SQLContext

Instance Constructors

new SQLContext(sparkContext: SparkContext)

Type Members

implicit class DslAttribute extends AnyRef

Definition Classes
ExpressionConversions
implicit class DslExpression extends ImplicitOperators

Definition Classes
ExpressionConversions
implicit class DslString extends ImplicitOperators

Definition Classes
ExpressionConversions
implicit class DslSymbol extends ImplicitAttribute

Definition Classes
ExpressionConversions
abstract class ImplicitAttribute extends ImplicitOperators

Definition Classes
ExpressionConversions
abstract class QueryExecution extends AnyRef

:: DeveloperApi :: The primary workflow for executing relational queries using Spark.
class SparkPlanner extends SparkStrategies

Attributes
protected[org.apache.spark.sql]

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
lazy val analyzer: Analyzer

Attributes
protected[org.apache.spark.sql]

def applySchema(rowRDD: RDD[Row], schema: StructType): SchemaRDD

:: DeveloperApi :: Creates a SchemaRDD from an RDD containing Rows by applying a schema to this RDD.

:: DeveloperApi :: Creates a SchemaRDD from an RDD containing Rows by applying a schema to this RDD. It is important to make sure that the structure of every Row of the provided RDD matches the provided schema. Otherwise, there will be runtime exception. Example:

import org.apache.spark.sql._
val sqlContext = new org.apache.spark.sql.SQLContext(sc)

val schema =
  StructType(
    StructField("name", StringType, false) ::
    StructField("age", IntegerType, true) :: Nil)

val people =
  sc.textFile("examples/src/main/resources/people.txt").map(
    _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
val peopleSchemaRDD = sqlContext. applySchema(people, schema)
peopleSchemaRDD.printSchema
// root
// |-- name: string (nullable = false)
// |-- age: integer (nullable = true)

  peopleSchemaRDD.registerTempTable("people")
sqlContext.sql("select name from people").collect.foreach(println)

Annotations: @DeveloperApi()

def approxCountDistinct(e: Expression, rsd: Double): ApproxCountDistinct

Definition Classes
ExpressionConversions
final def asInstanceOf[T0]: T0

Definition Classes
Any
def avg(e: Expression): Average

Definition Classes
ExpressionConversions
implicit def binaryToLiteral(a: Array[Byte]): Literal

Definition Classes
ExpressionConversions
implicit def booleanToLiteral(b: Boolean): Literal

Definition Classes
ExpressionConversions
implicit def byteToLiteral(b: Byte): Literal

Definition Classes
ExpressionConversions
def cacheTable(tableName: String): Unit

Caches the specified table in-memory.
lazy val catalog: Catalog

Attributes
protected[org.apache.spark.sql]
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def count(e: Expression): Count

Definition Classes
ExpressionConversions
def countDistinct(e: Expression*): CountDistinct

Definition Classes
ExpressionConversions
def createParquetFile[A <: Product](path: String, allowExisting: Boolean = true, conf: Configuration = new Configuration())(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): SchemaRDD

:: Experimental :: Creates an empty parquet file with the schema of class A, which can be registered as a table.
:: Experimental :: Creates an empty parquet file with the schema of class A, which can be registered as a table. This registered table can be used as the target of future insertInto operations.
```
val sqlContext = new SQLContext(...)
import sqlContext._

case class Person(name: String, age: Int)
createParquetFile[Person]("path/to/file.parquet").registerTempTable("people")
sql("INSERT INTO people SELECT 'michael', 29")
```
A
A case class type that describes the desired schema of the parquet file to be created.
path
The path where the directory containing parquet metadata should be created. Data inserted into this table will also be stored at this location.
allowExisting
When false, an exception will be thrown if this directory already exists.
conf
A Hadoop configuration object that can be used to specify options to the parquet output format.

Annotations
@Experimental()
implicit def createSchemaRDD[A <: Product](rdd: RDD[A])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): SchemaRDD

Creates a SchemaRDD from an RDD of case classes.
implicit def decimalToLiteral(d: BigDecimal): Literal

Definition Classes
ExpressionConversions
implicit def doubleToLiteral(d: Double): Literal

Definition Classes
ExpressionConversions
lazy val emptyResult: RDD[Row]

Attributes
protected[org.apache.spark.sql]
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def executePlan(plan: LogicalPlan): QueryExecution

Attributes
protected[org.apache.spark.sql]
def executeSql(sql: String): QueryExecution

Attributes
protected[org.apache.spark.sql]
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def first(e: Expression): First

Definition Classes
ExpressionConversions
implicit def floatToLiteral(f: Float): Literal

Definition Classes
ExpressionConversions
lazy val functionRegistry: FunctionRegistry

Attributes
protected[org.apache.spark.sql]
def getAllConfs: Map[String, String]

Return all the configuration properties that have been set (i.
Return all the configuration properties that have been set (i.e. not the default). This creates a new copy of the config properties in the form of a Map.

Definition Classes
SQLConf
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getConf(key: String, defaultValue: String): String

Return the value of Spark SQL configuration property for the given key.
Return the value of Spark SQL configuration property for the given key. If the key is not set yet, return defaultValue.

Definition Classes
SQLConf
def getConf(key: String): String

Return the value of Spark SQL configuration property for the given key.
Return the value of Spark SQL configuration property for the given key.

Definition Classes
SQLConf
def hashCode(): Int

Definition Classes
AnyRef → Any
implicit def intToLiteral(i: Int): Literal

Definition Classes
ExpressionConversions
def isCached(tableName: String): Boolean

Returns true if the table is currently cached in-memory.
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def jsonFile(path: String, samplingRatio: Double): SchemaRDD

:: Experimental ::
:: Experimental ::

Annotations
@Experimental()
def jsonFile(path: String, schema: StructType): SchemaRDD

:: Experimental :: Loads a JSON file (one object per line) and applies the given schema, returning the result as a SchemaRDD.
:: Experimental :: Loads a JSON file (one object per line) and applies the given schema, returning the result as a SchemaRDD.

Annotations
@Experimental()
def jsonFile(path: String): SchemaRDD

Loads a JSON file (one object per line), returning the result as a SchemaRDD.
Loads a JSON file (one object per line), returning the result as a SchemaRDD. It goes through the entire dataset once to determine the schema.
def jsonRDD(json: RDD[String], samplingRatio: Double): SchemaRDD

:: Experimental ::
:: Experimental ::

Annotations
@Experimental()
def jsonRDD(json: RDD[String], schema: StructType): SchemaRDD

:: Experimental :: Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema, returning the result as a SchemaRDD.
:: Experimental :: Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema, returning the result as a SchemaRDD.

Annotations
@Experimental()
def jsonRDD(json: RDD[String]): SchemaRDD

Loads an RDD[String] storing JSON objects (one object per record), returning the result as a SchemaRDD.
Loads an RDD[String] storing JSON objects (one object per record), returning the result as a SchemaRDD. It goes through the entire dataset once to determine the schema.
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
implicit def logicalPlanToSparkQuery(plan: LogicalPlan): SchemaRDD

:: DeveloperApi :: Allows catalyst LogicalPlans to be executed as a SchemaRDD.
:: DeveloperApi :: Allows catalyst LogicalPlans to be executed as a SchemaRDD. Note that the LogicalPlan interface is considered internal, and thus not guaranteed to be stable. As a result, using them directly is not recommended.

Annotations
@DeveloperApi()
implicit def longToLiteral(l: Long): Literal

Definition Classes
ExpressionConversions
def lower(e: Expression): Lower

Definition Classes
ExpressionConversions
def max(e: Expression): Max

Definition Classes
ExpressionConversions
def min(e: Expression): Min

Definition Classes
ExpressionConversions
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val optimizer: Optimizer.type

Attributes
protected[org.apache.spark.sql]
def parquetFile(path: String): SchemaRDD

Loads a Parquet file, returning the result as a SchemaRDD.
def parseSql(sql: String): LogicalPlan

Attributes
protected[org.apache.spark.sql]
val parser: SqlParser

Attributes
protected[org.apache.spark.sql]
val planner: SparkPlanner

Attributes
protected[org.apache.spark.sql]
val prepareForExecution: RuleExecutor[SparkPlan] { val batches: List[this.Batch] }

Prepares a planned SparkPlan for execution by inserting shuffle operations as needed.
Prepares a planned SparkPlan for execution by inserting shuffle operations as needed.

Attributes
protected[org.apache.spark.sql]
def registerFunction[T](name: String, func: Function22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function13[_, _, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function12[_, _, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function11[_, _, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function10[_, _, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function9[_, _, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function8[_, _, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function7[_, _, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function6[_, _, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function5[_, _, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function4[_, _, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function3[_, _, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function2[_, _, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

Definition Classes
UDFRegistration
def registerFunction[T](name: String, func: Function1[_, T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Unit

registerFunction 1-22 were generated by this script
registerFunction 1-22 were generated by this script
(1 to 22).map { x => val types = (1 to x).map(x => "_").reduce(_ + ", " + _) s""" def registerFunction[T: TypeTag](name: String, func: Function$x[$types, T]): Unit = { def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e) functionRegistry.registerFunction(name, builder) } """ }

Definition Classes
UDFRegistration
def registerRDDAsTable(rdd: SchemaRDD, tableName: String): Unit

Registers the given RDD as a temporary table in the catalog.
Registers the given RDD as a temporary table in the catalog. Temporary tables exist only during the lifetime of this instance of SQLContext.
def setConf(key: String, value: String): Unit

Set the given Spark SQL configuration property.
Set the given Spark SQL configuration property.

Definition Classes
SQLConf
def setConf(props: Properties): Unit

Set Spark SQL configuration properties.
Set Spark SQL configuration properties.

Definition Classes
SQLConf
val settings: Map[String, String]

Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap.
Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap.

Attributes
protected[org.apache.spark]
Definition Classes
SQLConf
implicit def shortToLiteral(s: Short): Literal

Definition Classes
ExpressionConversions
val sparkContext: SparkContext
def sql(sqlText: String): SchemaRDD

Executes a SQL query using Spark, returning the result as a SchemaRDD.
Executes a SQL query using Spark, returning the result as a SchemaRDD. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'.
implicit def stringToLiteral(s: String): Literal

Definition Classes
ExpressionConversions
def sum(e: Expression): Sum

Definition Classes
ExpressionConversions
def sumDistinct(e: Expression): SumDistinct

Definition Classes
ExpressionConversions
implicit def symbolToUnresolvedAttribute(s: Symbol): UnresolvedAttribute

Definition Classes
ExpressionConversions
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def table(tableName: String): SchemaRDD

Returns the specified table as a SchemaRDD
implicit def timestampToLiteral(t: Timestamp): Literal

Definition Classes
ExpressionConversions
def toString(): String

Definition Classes
AnyRef → Any
def uncacheTable(tableName: String): Unit

Removes the specified table from the in-memory cache.
def upper(e: Expression): Upper

Definition Classes
ExpressionConversions
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

class SQLContext extends Logging with SQLConf with ExpressionConversions with UDFRegistration with Serializable

Instance Constructors

new SQLContext(sparkContext: SparkContext)

Type Members

implicit class DslAttribute extends AnyRef

implicit class DslExpression extends ImplicitOperators

implicit class DslString extends ImplicitOperators

implicit class DslSymbol extends ImplicitAttribute

abstract class ImplicitAttribute extends ImplicitOperators

abstract class QueryExecution extends AnyRef

class SparkPlanner extends SparkStrategies

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

lazy val analyzer: Analyzer

def applySchema(rowRDD: RDD[Row], schema: StructType): SchemaRDD

def approxCountDistinct(e: Expression, rsd: Double): ApproxCountDistinct

final def asInstanceOf[T0]: T0

def avg(e: Expression): Average

implicit def binaryToLiteral(a: Array[Byte]): Literal

implicit def booleanToLiteral(b: Boolean): Literal

implicit def byteToLiteral(b: Byte): Literal

def cacheTable(tableName: String): Unit

lazy val catalog: Catalog

def clone(): AnyRef

def count(e: Expression): Count

def countDistinct(e: Expression*): CountDistinct

def createParquetFile[A <: Product](path: String, allowExisting: Boolean = true, conf: Configuration = new Configuration())(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): SchemaRDD

implicit def createSchemaRDD[A <: Product](rdd: RDD[A])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): SchemaRDD

implicit def decimalToLiteral(d: BigDecimal): Literal

implicit def doubleToLiteral(d: Double): Literal

lazy val emptyResult: RDD[Row]

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def executePlan(plan: LogicalPlan): QueryExecution

def executeSql(sql: String): QueryExecution

def finalize(): Unit

def first(e: Expression): First

implicit def floatToLiteral(f: Float): Literal

lazy val functionRegistry: FunctionRegistry

def getAllConfs: Map[String, String]

final def getClass(): Class[_]

def getConf(key: String, defaultValue: String): String

def getConf(key: String): String

def hashCode(): Int

implicit def intToLiteral(i: Int): Literal

def isCached(tableName: String): Boolean

final def isInstanceOf[T0]: Boolean

def isTraceEnabled(): Boolean

def jsonFile(path: String, samplingRatio: Double): SchemaRDD

def jsonFile(path: String, schema: StructType): SchemaRDD

def jsonFile(path: String): SchemaRDD

def jsonRDD(json: RDD[String], samplingRatio: Double): SchemaRDD

def jsonRDD(json: RDD[String], schema: StructType): SchemaRDD

def jsonRDD(json: RDD[String]): SchemaRDD

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

implicit def logicalPlanToSparkQuery(plan: LogicalPlan): SchemaRDD

implicit def longToLiteral(l: Long): Literal

def lower(e: Expression): Lower

def max(e: Expression): Max

def min(e: Expression): Min

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val optimizer: Optimizer.type