From c62bcf9ce154e49805ed02f92535ebe0c1d37b28 Mon Sep 17 00:00:00 2001 From: Cam Saul <cammsaul@gmail.com> Date: Thu, 10 May 2018 15:17:07 -0700 Subject: [PATCH] Separate SparkSQL dependencies [ci drivers] --- bin/ci | 4 ++ project.clj | 15 +----- src/metabase/driver/FixedHiveConnection.clj | 26 ---------- src/metabase/driver/FixedHiveDriver.clj | 19 ------- src/metabase/driver/sparksql.clj | 57 +++++++++++++-------- 5 files changed, 42 insertions(+), 79 deletions(-) delete mode 100644 src/metabase/driver/FixedHiveConnection.clj delete mode 100644 src/metabase/driver/FixedHiveDriver.clj diff --git a/bin/ci b/bin/ci index 67b431062dc..91372013e50 100755 --- a/bin/ci +++ b/bin/ci @@ -129,6 +129,10 @@ install-presto() { } install-sparksql() { + # first, download the Spark Deps JAR and put it in the plugins/ dir + wget --output-document=plugins/spark-deps.jar https://s3.amazonaws.com/sparksql-deps/metabase-sparksql-deps-1.2.1.spark2-standalone.jar + + # next, download Spark and run it spark_version='2.1.1' # Java 7 support was removed in Spark 2.2 so don't upgrade until we upgrade CI hadoop_version='2.7' diff --git a/project.clj b/project.clj index edcfda71742..ac7b582ad9a 100644 --- a/project.clj +++ b/project.clj @@ -92,16 +92,6 @@ [org.liquibase/liquibase-core "3.5.3"] ; migration management (Java lib) [org.postgresql/postgresql "42.1.4.jre7"] ; Postgres driver [org.slf4j/slf4j-log4j12 "1.7.25"] ; abstraction for logging frameworks -- allows end user to plug in desired logging framework at deployment time - [org.spark-project.hive/hive-jdbc "1.2.1.spark2" ; JDBC Driver for Apache Spark - :exclusions [org.apache.curator/curator-framework - org.apache.curator/curator-recipes - org.apache.thrift/libfb303 - org.apache.zookeeper/zookeeper - org.eclipse.jetty.aggregate/jetty-all - org.spark-project.hive/hive-common - org.spark-project.hive/hive-metastore - org.spark-project.hive/hive-serde - org.spark-project.hive/hive-shims]] [org.tcrawley/dynapath "0.2.5"] ; Dynamically add Jars (e.g. Oracle or Vertica) to classpath [org.xerial/sqlite-jdbc "3.21.0.1"] ; SQLite driver [org.yaml/snakeyaml "1.18"] ; YAML parser (required by liquibase) @@ -164,10 +154,7 @@ :env {:mb-run-mode "dev"} :jvm-opts ["-Dlogfile.path=target/log"] ;; Log appender class needs to be compiled for log4j to use it, - ;; classes for fixed Hive driver in must be compiled for tests - :aot [metabase.logger - metabase.driver.FixedHiveConnection - metabase.driver.FixedHiveDriver]} + :aot [metabase.logger]} :ci {:jvm-opts ["-Xmx3g"]} :reflection-warnings {:global-vars {*warn-on-reflection* true}} ; run `lein check-reflection-warnings` to check for reflection warnings :expectations {:injections [(require 'metabase.test-setup ; for test setup stuff diff --git a/src/metabase/driver/FixedHiveConnection.clj b/src/metabase/driver/FixedHiveConnection.clj deleted file mode 100644 index 6a06958e775..00000000000 --- a/src/metabase/driver/FixedHiveConnection.clj +++ /dev/null @@ -1,26 +0,0 @@ -(ns metabase.driver.FixedHiveConnection - (:import [org.apache.hive.jdbc HiveConnection] - [java.sql ResultSet SQLException] - java.util.Properties) - (:gen-class - :extends org.apache.hive.jdbc.HiveConnection - :init init - :constructors {[String java.util.Properties] [String java.util.Properties]})) - -(defn -init - "Initializes the connection" - [uri properties] - [[uri properties] nil]) - -(defn -getHoldability - "Returns the holdability setting for this JDBC driver" - [^org.apache.hive.jdbc.HiveConnection this] - ResultSet/CLOSE_CURSORS_AT_COMMIT) - -(defn -setReadOnly - "Sets this connection to read only" - [^org.apache.hive.jdbc.HiveConnection this read-only?] - (when (.isClosed this) - (throw (SQLException. "Connection is closed"))) - (when read-only? - (throw (SQLException. "Enabling read-only mode is not supported")))) diff --git a/src/metabase/driver/FixedHiveDriver.clj b/src/metabase/driver/FixedHiveDriver.clj deleted file mode 100644 index b477ab10bb0..00000000000 --- a/src/metabase/driver/FixedHiveDriver.clj +++ /dev/null @@ -1,19 +0,0 @@ -(ns metabase.driver.FixedHiveDriver - (:import [org.apache.hive.jdbc HiveDriver] - java.util.Properties) - (:gen-class - :extends org.apache.hive.jdbc.HiveDriver - :init init - :prefix "driver-" - :constructors {[] []})) - -(defn driver-init - "Initializes the Hive driver, fixed to work with Metabase" - [] - [[] nil]) - -(defn driver-connect - "Connects to a Hive compatible database" - [^org.apache.hive.jdbc.HiveDriver this ^String url ^java.util.Properties info] - (when (.acceptsURL this url) - (clojure.lang.Reflector/invokeConstructor (Class/forName "metabase.driver.FixedHiveConnection") (to-array [url info])))) diff --git a/src/metabase/driver/sparksql.clj b/src/metabase/driver/sparksql.clj index 419dba5a3ba..fd281834fb7 100644 --- a/src/metabase/driver/sparksql.clj +++ b/src/metabase/driver/sparksql.clj @@ -3,6 +3,7 @@ [set :as set] [string :as s]] [clojure.java.jdbc :as jdbc] + [clojure.tools.logging :as log] [honeysql [core :as hsql] [helpers :as h]] @@ -15,7 +16,8 @@ [hive-like :as hive-like]] [metabase.driver.generic-sql.query-processor :as sqlqp] [metabase.query-processor.util :as qputil] - [metabase.util.honeysql-extensions :as hx]) + [metabase.util.honeysql-extensions :as hx] + [puppetlabs.i18n.core :refer [trs]]) (:import clojure.lang.Reflector java.sql.DriverManager metabase.query_processor.interface.Field)) @@ -94,23 +96,6 @@ [{:keys [host port db jdbc-flags] :or {host "localhost", port 10000, db "", jdbc-flags ""} :as opts}] - ;; manually register our FixedHiveDriver with java.sql.DriverManager and make sure it's the only driver returned for - ;; jdbc:hive2, since we do not want to use the driver registered by the super class of our FixedHiveDriver. - ;; - ;; Class/forName and invokeConstructor is required to make this compile, but it may be possible to solve this with - ;; the right project.clj magic - (DriverManager/registerDriver - (Reflector/invokeConstructor - (Class/forName "metabase.driver.FixedHiveDriver") - (into-array []))) - (loop [] - (when-let [driver (try - (DriverManager/getDriver "jdbc:hive2://localhost:10000") - (catch java.sql.SQLException _ - nil))] - (when-not (instance? (Class/forName "metabase.driver.FixedHiveDriver") driver) - (DriverManager/deregisterDriver driver) - (recur)))) (merge {:classname "metabase.driver.FixedHiveDriver" :subprotocol "hive2" :subname (str "//" host ":" port "/" db jdbc-flags)} @@ -223,7 +208,39 @@ :string-length-fn (u/drop-first-arg hive-like/string-length-fn) :unix-timestamp->timestamp (u/drop-first-arg hive-like/unix-timestamp->timestamp)})) +(defn- register-hive-jdbc-driver! [& {:keys [remaining-tries], :or {remaining-tries 5}}] + ;; manually register our FixedHiveDriver with java.sql.DriverManager + (DriverManager/registerDriver + (Reflector/invokeConstructor + (Class/forName "metabase.driver.FixedHiveDriver") + (into-array []))) + ;; now make sure it's the only driver returned + ;; for jdbc:hive2, since we do not want to use the driver registered by the super class of our FixedHiveDriver. + (when-let [driver (u/ignore-exceptions + (DriverManager/getDriver "jdbc:hive2://localhost:10000"))] + (let [registered? (instance? (Class/forName "metabase.driver.FixedHiveDriver") driver)] + (cond + registered? + true + + ;; if it's not the registered driver, deregister the current driver (if applicable) and try a couple more times + ;; before giving up :( + (and (not registered?) + (> remaining-tries 0)) + (do + (when driver + (DriverManager/deregisterDriver driver)) + (recur {:remaining-tries (dec remaining-tries)})) + + :else + (log/error + (trs "Error: metabase.driver.FixedHiveDriver is registered, but JDBC does not seem to be using it.")))))) + (defn -init-driver - "Register the SparkSQL driver." + "Register the SparkSQL driver if the SparkSQL dependencies are available." [] - (driver/register-driver! :sparksql (SparkSQLDriver.))) + (when (u/ignore-exceptions (Class/forName "metabase.driver.FixedHiveDriver")) + (log/info (trs "Found metabase.driver.FixedHiveDriver.")) + (when (u/ignore-exceptions (register-hive-jdbc-driver!)) + (log/info (trs "Successfully registered metabase.driver.FixedHiveDriver with JDBC.")) + (driver/register-driver! :sparksql (SparkSQLDriver.))))) -- GitLab