Sunday, 19 June 2016

Spark-shell: :10: error: not found: value sqlContext on windows




WARN Connection: BoneCP specified but not present in    
 CLASSPATH (or one of dependencies)
WARN Connection: BoneCP specified but not present in                 CLASSPATH (or one of dependencies)
WARN ObjectStore: Version information not found in     metastore. hive.metastore.schema.verification is not enabled so recording the     schema version 1.2.0
WARN ObjectStore: Failed to get database default,         returning NoSuchObjectException
WARN : Your hostname, DESKTOP-8JS2RD5 resolves to a     loopback/non-reachable address: fe80:0:0:0:0:5efe:c0a8:103%net1, but we couldn't     find any external IP address!
java.lang.RuntimeException: java.lang.NullPointerException
        at     org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
        at org.apache.spark.sql.hive.client.ClientWrapper.<init>    (ClientWrapper.scala:171)
    at     org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala    :163)
        at     org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:161)
        at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:168)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
    at java.lang.reflect.Constructor.newInstance(Unknown Source)
    at org.apache.spark.repl.SparkILoop.createSQLContext(SparkILoop.scala:1028)
    at $iwC$$iwC.<init>(<console>:9)
    at $iwC.<init>(<console>:18)
    at <init>(<console>:20)
    at .<init>(<console>:24)
    at .<clinit>(<console>)
    at .<init>(<console>:7)
    at .<clinit>(<console>)
    at $print(<console>)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
    at java.lang.reflect.Method.invoke(Unknown Source)
    at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
    at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
    at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
    at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
    at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
    at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
    at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
    at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
    at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:132)
    at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124)
    at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324)
    at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124)
    at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64)
    at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974)

          at       org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159)
          at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64)
          at       org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.sca      la:108)
          at       org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64)
          at       org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$proc      ess$1.apply$mcZ$sp(SparkILoop.scala:991)
          at       org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$proc      ess$1.apply(SparkILoop.scala:945)
          at       org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$proc      ess$1.apply(SparkILoop.scala:945)
          at       scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scal      a:135)
          at       org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(Spark      ILoop.scala:945)
          at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
          at org.apache.spark.repl.Main$.main(Main.scala:31)
          at org.apache.spark.repl.Main.main(Main.scala)
          at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
          at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
          at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
          at java.lang.reflect.Method.invoke(Unknown Source)
    at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
    at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
    at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
  Caused by: java.lang.NullPointerException
    at java.lang.ProcessBuilder.start(Unknown Source)
    at org.apache.hadoop.util.Shell.runCommand(Shell.java:445)
    at org.apache.hadoop.util.Shell.run(Shell.java:418)
    at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650)
    at org.apache.hadoop.util.Shell.execCommand(Shell.java:739)
    at org.apache.hadoop.util.Shell.execCommand(Shell.java:722)
    at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097)
    at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:559)
    at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:534)
    at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599)
    at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554)
    at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508)
    ... 56 more

  <console>      :10: error: not found: value sqlContext
               import sqlContext.implicits._
                ^
  <console>:10: error: not found: value sqlContext
         import sqlContext.sql
          ^

  scala>

Fix:
  Fix:
  1) Copy winutils.exe from here(https://github.com/steveloughran/winutils/tree/master/hadoop-2.6.0/bin) to some folder say, C:\Hadoop\bin. Set HADOOP_HOME to C:\Hadoop.
  2) Open admin command prompt. Run C:\Hadoop\bin\winutils.exe chmod 777 /tmp/hive

Result:

Friday, 17 June 2016

Spark: java.sql.SQLException: No suitable driver


Trying to retrieve data in using SQL context, displaying below error message


1. Invoke Spark-shell
2) Run below code in sql context

import org.apache.spark.sql.SQLContext
val sqlContext = new SQLContext(sc)
val url = "jdbc:mysql://localhost:3306/retail_db?user=root&password=root"
sqlContext.load("jdbc", Map("url" -> url,"dbtable" -> "departments")).collect().foreach(println)

3) Error shown as below at cmd prompt:
scala> sqlContext.load("jdbc", Map("url" -> url,"dbtable" -> "departments")).collect().foreach(println)
warning: there were 1 deprecation warning(s); re-run with -deprecation for details
java.sql.SQLException: No suitable driver
        at java.sql.DriverManager.getDriver(DriverManager.java:278)
        at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$2.apply(JdbcUtils.scala:50)
        at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$2.apply(JdbcUtils.scala:50)
        at scala.Option.getOrElse(Option.scala:120)
        at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.createConnectionFactory(JdbcUtils.scala:49)
        at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:120)
        at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation.<init>(JDBCRelation.scala:91)
        at org.apache.spark.sql.execution.datasources.jdbc.DefaultSource.createRelation(DefaultSource.scala:57)
        at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:158)

Fix:
Invoke shell using below command 

spark-shell --driver-class-path /usr/local/hadoop/mysql-connector-java-5.1.39/mysql-connector-java-5.1.39-bin.jar

Execute above source code
scala> import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.SQLContext

scala> val url = "jdbc:mysql://localhost:3306/retail_db?user=root&password=root"
url: String = jdbc:mysql://localhost:3306/retail_db?user=root&password=root

scala> sqlContext.load("jdbc", Map("url" -> url,"dbtable" -> "departments")).collect().foreach(println)
warning: there were 1 deprecation warning(s); re-run with -deprecation for details

Result: 
[2,Fitness]
[3,Footwear]
[4,Apparel]
[5,Golf]
[6,Outdoors]
[7,Fan Shop]