diff --git a/bin/ci b/bin/ci index 67b431062dc3f0b0e5d387c815352321ec900c3a..91372013e50ddadea9abd69f8385f7b5fc7985c6 100755 --- a/bin/ci +++ b/bin/ci @@ -129,6 +129,10 @@ install-presto() { } install-sparksql() { + # first, download the Spark Deps JAR and put it in the plugins/ dir + wget --output-document=plugins/spark-deps.jar https://s3.amazonaws.com/sparksql-deps/metabase-sparksql-deps-1.2.1.spark2-standalone.jar + + # next, download Spark and run it spark_version='2.1.1' # Java 7 support was removed in Spark 2.2 so don't upgrade until we upgrade CI hadoop_version='2.7' diff --git a/docs/administration-guide/01-managing-databases.md b/docs/administration-guide/01-managing-databases.md index fd6a932cbcdad990ed1fd2afd5c4f475ec40f87b..34a25b2035e0fd39da0aba6d9c33d0efafe0180c 100644 --- a/docs/administration-guide/01-managing-databases.md +++ b/docs/administration-guide/01-managing-databases.md @@ -24,6 +24,7 @@ Now you’ll see a list of your databases. To connect another database to Metaba * [Vertica](databases/vertica.md) * Presto * Google Analytics +* [SparkSQL](databases/spark.md) To add a database, you'll need its connection information. diff --git a/docs/administration-guide/databases/spark.md b/docs/administration-guide/databases/spark.md new file mode 100644 index 0000000000000000000000000000000000000000..66990e00b6ebeb073f205f82ca539d7748f702e5 --- /dev/null +++ b/docs/administration-guide/databases/spark.md @@ -0,0 +1,50 @@ +## Working with SparkSQL in Metabase + +Starting in v0.29.0, Metabase provides a driver for connecting to SparkSQL databases. Under the hood, Metabase uses SparkSQL's +JDBC driver and other dependencies; due to the sheer size of this dependency, we can't include it as part of Metabase. Luckily, downloading it yourself + and making it available to Metabase is straightforward and only takes a few minutes. + +### Downloading the SparkSQL JDBC Driver JAR + +You can download the required dependencies [here](https://s3.amazonaws.com/sparksql-deps/metabase-sparksql-deps-1.2.1.spark2-standalone.jar). + +### Adding the SparkSQL JDBC Driver JAR to the Metabase Plugins Directory + +Metabase will automatically make the SparkSQL driver available if it finds the SparkSQL dependencies JAR in the Metabase plugins +directory when it starts up. All you need to do is create the directory, move the JAR you just downloaded into it, and restart +Metabase. + +By default, the plugins directory is called `plugins`, and lives in the same directory as the Metabase JAR. + +For example, if you're running Metabase from a directory called `/app/`, you should move the SparkSQL dependencies JAR to +`/app/plugins/`: + +```bash +# example directory structure for running Metabase with SparkSQL support +/app/metabase.jar +/app/plugins/metabase-sparksql-deps-1.2.1.spark2-standalone.jar +``` + +If you're running Metabase from the Mac App, the plugins directory defaults to `~/Library/Application Support/Metabase/Plugins/`: + +```bash +# example directory structure for running Metabase Mac App with SparkSQL support +/Users/camsaul/Library/Application Support/Metabase/Plugins/metabase-sparksql-deps-1.2.1.spark2-standalone.jar +``` + +Finally, you can choose a custom plugins directory if the default doesn't suit your needs by setting the environment variable +`MB_PLUGINS_DIR`. + + +### Enabling Plugins on Java 9 + +Java 9 disables dynamically adding JARs to the Java classpath by default for security reasons. For the time being, we recommend you +run Metabase with Java 8 when using the SparkSQL driver. + +You may be able to get Java 9 to work by passing an extra JVM option: + +```bash +java --add-opens=java.base/java.net=ALL-UNNAMED -jar metabase.jar +``` + +The default Docker images already include this option. diff --git a/project.clj b/project.clj index edcfda7174218098ead35195ed7b933301a722c3..ac7b582ad9a016fe67f6fe80ed01271665764571 100644 --- a/project.clj +++ b/project.clj @@ -92,16 +92,6 @@ [org.liquibase/liquibase-core "3.5.3"] ; migration management (Java lib) [org.postgresql/postgresql "42.1.4.jre7"] ; Postgres driver [org.slf4j/slf4j-log4j12 "1.7.25"] ; abstraction for logging frameworks -- allows end user to plug in desired logging framework at deployment time - [org.spark-project.hive/hive-jdbc "1.2.1.spark2" ; JDBC Driver for Apache Spark - :exclusions [org.apache.curator/curator-framework - org.apache.curator/curator-recipes - org.apache.thrift/libfb303 - org.apache.zookeeper/zookeeper - org.eclipse.jetty.aggregate/jetty-all - org.spark-project.hive/hive-common - org.spark-project.hive/hive-metastore - org.spark-project.hive/hive-serde - org.spark-project.hive/hive-shims]] [org.tcrawley/dynapath "0.2.5"] ; Dynamically add Jars (e.g. Oracle or Vertica) to classpath [org.xerial/sqlite-jdbc "3.21.0.1"] ; SQLite driver [org.yaml/snakeyaml "1.18"] ; YAML parser (required by liquibase) @@ -164,10 +154,7 @@ :env {:mb-run-mode "dev"} :jvm-opts ["-Dlogfile.path=target/log"] ;; Log appender class needs to be compiled for log4j to use it, - ;; classes for fixed Hive driver in must be compiled for tests - :aot [metabase.logger - metabase.driver.FixedHiveConnection - metabase.driver.FixedHiveDriver]} + :aot [metabase.logger]} :ci {:jvm-opts ["-Xmx3g"]} :reflection-warnings {:global-vars {*warn-on-reflection* true}} ; run `lein check-reflection-warnings` to check for reflection warnings :expectations {:injections [(require 'metabase.test-setup ; for test setup stuff diff --git a/src/metabase/driver/FixedHiveConnection.clj b/src/metabase/driver/FixedHiveConnection.clj deleted file mode 100644 index 6a06958e77514861d0cb8f5eaec6c4784276850a..0000000000000000000000000000000000000000 --- a/src/metabase/driver/FixedHiveConnection.clj +++ /dev/null @@ -1,26 +0,0 @@ -(ns metabase.driver.FixedHiveConnection - (:import [org.apache.hive.jdbc HiveConnection] - [java.sql ResultSet SQLException] - java.util.Properties) - (:gen-class - :extends org.apache.hive.jdbc.HiveConnection - :init init - :constructors {[String java.util.Properties] [String java.util.Properties]})) - -(defn -init - "Initializes the connection" - [uri properties] - [[uri properties] nil]) - -(defn -getHoldability - "Returns the holdability setting for this JDBC driver" - [^org.apache.hive.jdbc.HiveConnection this] - ResultSet/CLOSE_CURSORS_AT_COMMIT) - -(defn -setReadOnly - "Sets this connection to read only" - [^org.apache.hive.jdbc.HiveConnection this read-only?] - (when (.isClosed this) - (throw (SQLException. "Connection is closed"))) - (when read-only? - (throw (SQLException. "Enabling read-only mode is not supported")))) diff --git a/src/metabase/driver/FixedHiveDriver.clj b/src/metabase/driver/FixedHiveDriver.clj deleted file mode 100644 index b477ab10bb0b5dae26ed7a59346d17b99b718a4d..0000000000000000000000000000000000000000 --- a/src/metabase/driver/FixedHiveDriver.clj +++ /dev/null @@ -1,19 +0,0 @@ -(ns metabase.driver.FixedHiveDriver - (:import [org.apache.hive.jdbc HiveDriver] - java.util.Properties) - (:gen-class - :extends org.apache.hive.jdbc.HiveDriver - :init init - :prefix "driver-" - :constructors {[] []})) - -(defn driver-init - "Initializes the Hive driver, fixed to work with Metabase" - [] - [[] nil]) - -(defn driver-connect - "Connects to a Hive compatible database" - [^org.apache.hive.jdbc.HiveDriver this ^String url ^java.util.Properties info] - (when (.acceptsURL this url) - (clojure.lang.Reflector/invokeConstructor (Class/forName "metabase.driver.FixedHiveConnection") (to-array [url info])))) diff --git a/src/metabase/driver/sparksql.clj b/src/metabase/driver/sparksql.clj index 419dba5a3ba5e0d1fb82a810bba656cc18c16047..fd281834fb76d526035ef4a54f3f3bfdd8d8f0cd 100644 --- a/src/metabase/driver/sparksql.clj +++ b/src/metabase/driver/sparksql.clj @@ -3,6 +3,7 @@ [set :as set] [string :as s]] [clojure.java.jdbc :as jdbc] + [clojure.tools.logging :as log] [honeysql [core :as hsql] [helpers :as h]] @@ -15,7 +16,8 @@ [hive-like :as hive-like]] [metabase.driver.generic-sql.query-processor :as sqlqp] [metabase.query-processor.util :as qputil] - [metabase.util.honeysql-extensions :as hx]) + [metabase.util.honeysql-extensions :as hx] + [puppetlabs.i18n.core :refer [trs]]) (:import clojure.lang.Reflector java.sql.DriverManager metabase.query_processor.interface.Field)) @@ -94,23 +96,6 @@ [{:keys [host port db jdbc-flags] :or {host "localhost", port 10000, db "", jdbc-flags ""} :as opts}] - ;; manually register our FixedHiveDriver with java.sql.DriverManager and make sure it's the only driver returned for - ;; jdbc:hive2, since we do not want to use the driver registered by the super class of our FixedHiveDriver. - ;; - ;; Class/forName and invokeConstructor is required to make this compile, but it may be possible to solve this with - ;; the right project.clj magic - (DriverManager/registerDriver - (Reflector/invokeConstructor - (Class/forName "metabase.driver.FixedHiveDriver") - (into-array []))) - (loop [] - (when-let [driver (try - (DriverManager/getDriver "jdbc:hive2://localhost:10000") - (catch java.sql.SQLException _ - nil))] - (when-not (instance? (Class/forName "metabase.driver.FixedHiveDriver") driver) - (DriverManager/deregisterDriver driver) - (recur)))) (merge {:classname "metabase.driver.FixedHiveDriver" :subprotocol "hive2" :subname (str "//" host ":" port "/" db jdbc-flags)} @@ -223,7 +208,39 @@ :string-length-fn (u/drop-first-arg hive-like/string-length-fn) :unix-timestamp->timestamp (u/drop-first-arg hive-like/unix-timestamp->timestamp)})) +(defn- register-hive-jdbc-driver! [& {:keys [remaining-tries], :or {remaining-tries 5}}] + ;; manually register our FixedHiveDriver with java.sql.DriverManager + (DriverManager/registerDriver + (Reflector/invokeConstructor + (Class/forName "metabase.driver.FixedHiveDriver") + (into-array []))) + ;; now make sure it's the only driver returned + ;; for jdbc:hive2, since we do not want to use the driver registered by the super class of our FixedHiveDriver. + (when-let [driver (u/ignore-exceptions + (DriverManager/getDriver "jdbc:hive2://localhost:10000"))] + (let [registered? (instance? (Class/forName "metabase.driver.FixedHiveDriver") driver)] + (cond + registered? + true + + ;; if it's not the registered driver, deregister the current driver (if applicable) and try a couple more times + ;; before giving up :( + (and (not registered?) + (> remaining-tries 0)) + (do + (when driver + (DriverManager/deregisterDriver driver)) + (recur {:remaining-tries (dec remaining-tries)})) + + :else + (log/error + (trs "Error: metabase.driver.FixedHiveDriver is registered, but JDBC does not seem to be using it.")))))) + (defn -init-driver - "Register the SparkSQL driver." + "Register the SparkSQL driver if the SparkSQL dependencies are available." [] - (driver/register-driver! :sparksql (SparkSQLDriver.))) + (when (u/ignore-exceptions (Class/forName "metabase.driver.FixedHiveDriver")) + (log/info (trs "Found metabase.driver.FixedHiveDriver.")) + (when (u/ignore-exceptions (register-hive-jdbc-driver!)) + (log/info (trs "Successfully registered metabase.driver.FixedHiveDriver with JDBC.")) + (driver/register-driver! :sparksql (SparkSQLDriver.)))))