diff --git a/bin/ci b/bin/ci index 67b431062dc3f0b0e5d387c815352321ec900c3a..91372013e50ddadea9abd69f8385f7b5fc7985c6 100755 --- a/bin/ci +++ b/bin/ci @@ -129,6 +129,10 @@ install-presto() { } install-sparksql() { + # first, download the Spark Deps JAR and put it in the plugins/ dir + wget --output-document=plugins/spark-deps.jar https://s3.amazonaws.com/sparksql-deps/metabase-sparksql-deps-1.2.1.spark2-standalone.jar + + # next, download Spark and run it spark_version='2.1.1' # Java 7 support was removed in Spark 2.2 so don't upgrade until we upgrade CI hadoop_version='2.7' diff --git a/bin/docker/run_metabase.sh b/bin/docker/run_metabase.sh index a6de5a04f7d08bab7714aa93049ee84686b5ca49..66f487756e72fd8108b9ef2b1c88f87dd21cfe55 100755 --- a/bin/docker/run_metabase.sh +++ b/bin/docker/run_metabase.sh @@ -116,7 +116,6 @@ JAVA_OPTS="${JAVA_OPTS} -Dlogfile.path=target/log" JAVA_OPTS="${JAVA_OPTS} -XX:+CMSClassUnloadingEnabled" # These two needed for Java 7 to GC dynamically generated classes JAVA_OPTS="${JAVA_OPTS} -XX:+UseConcMarkSweepGC" JAVA_OPTS="${JAVA_OPTS} -server" -JAVA_OPTS="${JAVA_OPTS} --add-opens=java.base/java.net=ALL-UNNAMED" # needed for Java 9 to allow dynamic classpath additions -- see https://github.com/tobias/dynapath JAVA_OPTS="${JAVA_OPTS} --add-modules=java.xml.bind" # needed for Java 9 (Oracle VM only) because java.xml.bind is no longer on SE classpath by default since it's EE if [ ! -z "$JAVA_TIMEZONE" ]; then diff --git a/bin/start b/bin/start index 06e70266d99caa6d2ab7b929791f3c2fd765ef31..4ea1d92aeba67717cec11c2d423ba26ef7105653 100755 --- a/bin/start +++ b/bin/start @@ -114,11 +114,10 @@ if [ -n "$HEROKU" ]; then fi # Other Java options -JAVA_OPTS="$JAVA_OPTS -XX:+IgnoreUnrecognizedVMOptions" # Don't barf if we see an option we don't understand (e.g. Java 9 option on Java 7/8) -JAVA_OPTS="$JAVA_OPTS -Djava.awt.headless=true" # don't try to start AWT. Not sure this does anything but better safe than wasting memory -JAVA_OPTS="$JAVA_OPTS -Dfile.encoding=UTF-8" # Use UTF-8 -JAVA_OPTS="$JAVA_OPTS --add-opens=java.base/java.net=ALL-UNNAMED" # Allow dynamically adding JARs to classpath (Java 9) -JAVA_OPTS="$JAVA_OPTS --add-modules=java.xml.bind" # Enable access to java.xml.bind module (Java 9) +JAVA_OPTS="$JAVA_OPTS -XX:+IgnoreUnrecognizedVMOptions" # Don't barf if we see an option we don't understand (e.g. Java 9 option on Java 7/8) +JAVA_OPTS="$JAVA_OPTS -Djava.awt.headless=true" # don't try to start AWT. Not sure this does anything but better safe than wasting memory +JAVA_OPTS="$JAVA_OPTS -Dfile.encoding=UTF-8" # Use UTF-8 +JAVA_OPTS="$JAVA_OPTS --add-modules=java.xml.bind" # Enable access to java.xml.bind module (Java 9) echo "Using these JAVA_OPTS: ${JAVA_OPTS}" diff --git a/bin/version b/bin/version index 9eac5b0dd8c57d5fdbed8446d7202674fe49232f..38d1d635c90a4e2cdf905b3c89affeaa58d547e0 100755 --- a/bin/version +++ b/bin/version @@ -1,6 +1,6 @@ #!/usr/bin/env bash -VERSION="v0.29.0-snapshot" +VERSION="v0.30.0-snapshot" # dynamically pull more interesting stuff from latest git commit HASH=$(git show-ref --head --hash=7 head) # first 7 letters of hash should be enough; that's what GitHub uses diff --git a/docs/administration-guide/01-managing-databases.md b/docs/administration-guide/01-managing-databases.md index fd6a932cbcdad990ed1fd2afd5c4f475ec40f87b..34a25b2035e0fd39da0aba6d9c33d0efafe0180c 100644 --- a/docs/administration-guide/01-managing-databases.md +++ b/docs/administration-guide/01-managing-databases.md @@ -24,6 +24,7 @@ Now you’ll see a list of your databases. To connect another database to Metaba * [Vertica](databases/vertica.md) * Presto * Google Analytics +* [SparkSQL](databases/spark.md) To add a database, you'll need its connection information. diff --git a/docs/administration-guide/databases/oracle.md b/docs/administration-guide/databases/oracle.md index 7642bae477c26104ffa6d2a6a7c083b1078a9d0a..7232641bfa648d0d64f1f71ecab6914bb73b700d 100644 --- a/docs/administration-guide/databases/oracle.md +++ b/docs/administration-guide/databases/oracle.md @@ -38,12 +38,22 @@ If you're running Metabase from the Mac App, the plugins directory defaults to ` Finally, you can choose a custom plugins directory if the default doesn't suit your needs by setting the environment variable `MB_PLUGINS_DIR`. -### Enabling Plugins on Java 9 +### Adding Additional Dependencies with Java 9 -Java 9 disables dynamically adding JARs to the Java classpath by default for security reasons. When using Java 9, you'll need to pass an extra JVM option: +Java version 9 has introduced a new module system that places some additional restrictions on class loading. To use +Metabase drivers that require extra external dependencies, you'll need to include them as part of the classpath at +launch time. Run Metabase as follows: ```bash -java --add-opens=java.base/java.net=ALL-UNNAMED -jar metabase.jar +# Unix +java -cp metabase.jar:plugins/* metabase.core ``` -The default Docker images already include this option. +On Windows, use a semicolon instead: + +```powershell +# Windows +java -cp metabase.jar;plugins/* metabase.core +``` + +The default Docker images use Java 8 so this step is only needed when running the JAR directly. diff --git a/docs/administration-guide/databases/spark.md b/docs/administration-guide/databases/spark.md new file mode 100644 index 0000000000000000000000000000000000000000..40dee1ed3787c7632c424448bf632eae788b7663 --- /dev/null +++ b/docs/administration-guide/databases/spark.md @@ -0,0 +1,74 @@ +## Working with SparkSQL in Metabase + +Starting in v0.29.0, Metabase provides a driver for connecting to SparkSQL databases. Under the hood, Metabase uses SparkSQL's +JDBC driver and other dependencies; due to the sheer size of this dependency, we can't include it as part of Metabase. Luckily, downloading it yourself + and making it available to Metabase is straightforward and only takes a few minutes. + +### Downloading the SparkSQL JDBC Driver JAR + +You can download the required dependencies [here](https://s3.amazonaws.com/sparksql-deps/metabase-sparksql-deps-1.2.1.spark2-standalone.jar). + +### Adding the SparkSQL JDBC Driver JAR to the Metabase Plugins Directory + +Metabase will automatically make the SparkSQL driver available if it finds the SparkSQL dependencies JAR in the Metabase plugins +directory when it starts up. All you need to do is create the directory, move the JAR you just downloaded into it, and restart +Metabase. + +By default, the plugins directory is called `plugins`, and lives in the same directory as the Metabase JAR. + +For example, if you're running Metabase from a directory called `/app/`, you should move the SparkSQL dependencies JAR to +`/app/plugins/`: + +```bash +# example directory structure for running Metabase with SparkSQL support +/app/metabase.jar +/app/plugins/metabase-sparksql-deps-1.2.1.spark2-standalone.jar +``` + +If you're running Metabase from the Mac App, the plugins directory defaults to `~/Library/Application Support/Metabase/Plugins/`: + +```bash +# example directory structure for running Metabase Mac App with SparkSQL support +/Users/camsaul/Library/Application Support/Metabase/Plugins/metabase-sparksql-deps-1.2.1.spark2-standalone.jar +``` + +Finally, you can choose a custom plugins directory if the default doesn't suit your needs by setting the environment variable +`MB_PLUGINS_DIR`. + + +### Adding Additional Dependencies with Java 9 + +Java version 9 has introduced a new module system that places some additional restrictions on class loading. To use +Metabase drivers that require extra external dependencies, you'll need to include them as part of the classpath at +launch time. Run Metabase as follows: + +```bash +# Unix +java -cp metabase.jar:plugins/* metabase.core +``` + +On Windows, use a semicolon instead: + +```powershell +# Windows +java -cp metabase.jar;plugins/* metabase.core +``` + +The default Docker images use Java 8 so this step is only needed when running the JAR directly. + + +### Using SparkSQL with a Custom Metabase Build + +The SparkSQL dependencies JAR contains additional classes inside the `metabase` Java package, the same package +the core Metabase code lives in. When multiple JARs include classes in the same package, Java requires them to +be signed with the same signing certificate. The official Metabase JAR and SparkSQL dependencies JAR are signed +with the same certificate, so everything works as expected. + +If you build a custom Metabase JAR, however, Java will refuse to load the SparkSQL dependencies JAR provided +above, because your JAR will not be signed with the same certificate (if you signed it at all). You will need to +build the SparkSQL dependencies JAR yourself, and, if applicable, sign it with the same certificate you signed +your custom Metabase JAR with. + +The SparkSQL dependencies project can be found at +[https://github.com/metabase/sparksql-deps](https://github.com/metabase/sparksql-deps). Instructions for building +the JAR are provided in the README. diff --git a/docs/administration-guide/databases/vertica.md b/docs/administration-guide/databases/vertica.md index 1664d8e88bede81abeb6b150259305b6012a950f..4448051aa0d58d87d8d8864c4447b13b58657575 100644 --- a/docs/administration-guide/databases/vertica.md +++ b/docs/administration-guide/databases/vertica.md @@ -39,12 +39,22 @@ If you're running Metabase from the Mac App, the plugins directory defaults to ` If you are running the Docker image or you want to use another directory for plugins, you should then specify a custom plugins directory by setting the environment variable `MB_PLUGINS_DIR`. -### Enabling Plugins on Java 9 +### Adding Additional Dependencies with Java 9 -Java 9 disables dynamically adding JARs to the Java classpath by default for security reasons. When using Java 9, you'll need to pass an extra JVM option: +Java version 9 has introduced a new module system that places some additional restrictions on class loading. To use +Metabase drivers that require extra external dependencies, you'll need to include them as part of the classpath at +launch time. Run Metabase as follows: ```bash -java --add-opens=java.base/java.net=ALL-UNNAMED -jar metabase.jar +# Unix +java -cp metabase.jar:plugins/* metabase.core ``` -The default Docker images already include this option. +On Windows, use a semicolon instead: + +```powershell +# Windows +java -cp metabase.jar;plugins/* metabase.core +``` + +The default Docker images use Java 8 so this step is only needed when running the JAR directly. diff --git a/docs/operations-guide/running-metabase-on-heroku.md b/docs/operations-guide/running-metabase-on-heroku.md index 4ef364c065b57c15af571904e7feae51328a7030..7895a2926f22f3595444fd1f319ad2bd62d9b2c9 100644 --- a/docs/operations-guide/running-metabase-on-heroku.md +++ b/docs/operations-guide/running-metabase-on-heroku.md @@ -11,7 +11,9 @@ If you've got a Heroku account then all there is to do is follow this one-click [](http://downloads.metabase.com/launch-heroku.html) -This will launch a Heroku deployment using a github repository that Metabase maintains. +This will launch a Heroku deployment using a GitHub repository that Metabase maintains. + +It should only take a few minutes for Metabase to start. You can check on the progress by viewing the logs at [https://dashboard.heroku.com/apps/YOUR_APPLICATION_NAME/logs](https://dashboard.heroku.com/apps/YOUR_APPLICATION_NAME/logs) or using the Heroku command line tool with the `heroku logs -t -a YOUR_APPLICATION_NAME` command. ### Upgrading beyond the `Free` tier @@ -27,6 +29,7 @@ Heroku is very kind and offers a free tier to be used for very small/non-critica * Heroku’s 30 second timeouts on all web requests can cause a few issues if you happen to have longer running database queries. Most people don’t run into this but be aware that it’s possible. * When using the `free` tier, if you don’t access the application for a while Heroku will sleep your Metabase environment. This prevents things like Pulses and Metabase background tasks from running when scheduled and at times makes the app appear to be slow when really it's just Heroku reloading your app. You can resolve this by upgrading to the `hobby` tier or higher. + * Sometimes Metabase may run out of memory and you will see messages like `Error R14 (Memory quota exceeded)` in the Heroku logs. If this happens regularly we recommend upgrading to the `standard-2x` tier dyno. Now that you’ve installed Metabase, it’s time to [set it up and connect it to your database](../setting-up-metabase.md). diff --git a/docs/operations-guide/start.md b/docs/operations-guide/start.md index 276099552c53326dfb897f2a466bfbeb42ca84d2..0f82f5c3236a81ce4e1c93535c6886744e01af1f 100644 --- a/docs/operations-guide/start.md +++ b/docs/operations-guide/start.md @@ -353,6 +353,23 @@ Running on Java 8 is the easiest path to running Metabase. There are no addition ## Running on Java 9 -Java version 9 has introduced a new module system that places some additional restrictions on class loading. Metabase (and it's dependencies) still rely on the old behavior. Metabase runs on Java 9, but requires an additional argument to work around these changes in the module system: +To use Metabase on Java 9 with Oracle, Vertica, SparkSQL, or other drivers that require external dependencies, +you'll need to tweak the way you launch Metabase. - java --add-opens=java.base/java.net=ALL-UNNAMED -jar metabase.jar +Java version 9 has introduced a new module system that places some additional restrictions on class loading. To use +Metabase drivers that require extra external dependencies, you'll need to include them as part of the classpath at +launch time. Run Metabase as follows: + +```bash +# Unix +java -cp metabase.jar:plugins/* metabase.core +``` + +On Windows, use a semicolon instead: + +```powershell +# Windows +java -cp metabase.jar;plugins/* metabase.core +``` + +The default Docker images use Java 8 so this step is only needed when running the JAR directly. diff --git a/docs/users-guide/14-x-rays.md b/docs/users-guide/14-x-rays.md index ac114968e85a9024a2902b5af9168c133bfb829a..09946cf01a2d6df3a1876b3c35a4f3b0dfbff5bd 100644 --- a/docs/users-guide/14-x-rays.md +++ b/docs/users-guide/14-x-rays.md @@ -6,11 +6,11 @@ X-rays are a fast and easy way to get automatic insights and explorations of you When you first connect a database to Metabase, Metabot will offer to show you some automated explorations of your data. - + Click on one of these to see an x-ray. - + You can see more suggested x-rays over on the right-hand side of the screen. Browsing through x-rays like this is a pretty fun way of getting a quick overview of your data. @@ -24,13 +24,13 @@ To quickly make your new dashboard visible to other users, go to the collection One great way to explore your data in general in Metabase is to click on points of interest in charts or tables, which shows you ways to further explore that point. We've added x-rays to this action menu, so if you for example find a point on your line chart that seems extra interesting, give it a click and x-ray it! We think you'll like what you see. - + ### X-rays in the Data Reference You can also create an x-ray by navigating to a table, field, metric, or segment in the [Data Reference](./12-data-model-reference.md). Just click the x-ray link in the left sidebar. - + ### Where did the old x-rays go? diff --git a/frontend/src/metabase/admin/people/containers/PeopleListingApp.jsx b/frontend/src/metabase/admin/people/containers/PeopleListingApp.jsx index a022aaca4e46d52e327ce2d86117c390489806ab..a4f7123a1b55490a319f2f63052a81de11050412 100644 --- a/frontend/src/metabase/admin/people/containers/PeopleListingApp.jsx +++ b/frontend/src/metabase/admin/people/containers/PeopleListingApp.jsx @@ -2,7 +2,6 @@ import React, { Component } from "react"; import PropTypes from "prop-types"; import { Link } from "react-router"; -import _ from "underscore"; import { connect } from "react-redux"; import LoadingAndErrorWrapper from "metabase/components/LoadingAndErrorWrapper.jsx"; @@ -30,7 +29,7 @@ export const MODAL_RESET_PASSWORD_EMAIL = "MODAL_RESET_PASSWORD_EMAIL"; export const MODAL_USER_ADDED_WITH_INVITE = "MODAL_USER_ADDED_WITH_INVITE"; export const MODAL_USER_ADDED_WITH_PASSWORD = "MODAL_USER_ADDED_WITH_PASSWORD"; -import { getUsers, getModal, getGroups } from "../selectors"; +import { getSortedUsers, getModal, getGroups } from "../selectors"; import { createUser, deleteUser, @@ -48,7 +47,7 @@ import { const mapStateToProps = (state, props) => { return { - users: getUsers(state, props), + users: getSortedUsers(state, props), modal: getModal(state, props), user: state.currentUser, groups: getGroups(state, props), @@ -402,8 +401,6 @@ export default class PeopleListingApp extends Component { let { modal, users, groups } = this.props; let { error } = this.state; - users = _.values(users).sort((a, b) => b.date_joined - a.date_joined); - return ( <LoadingAndErrorWrapper loading={!users} error={error}> {() => ( diff --git a/frontend/src/metabase/admin/people/selectors.js b/frontend/src/metabase/admin/people/selectors.js index 358d1aad1698b253aa0501ff8810c7ed09756248..577178f90162f4f09118909c5d42882b207f6038 100644 --- a/frontend/src/metabase/admin/people/selectors.js +++ b/frontend/src/metabase/admin/people/selectors.js @@ -23,3 +23,19 @@ export const getUsers = createSelector( .value(), })), ); + +// sort the users list by last_name, ignore case or diacritical marks. If last names are the same then compare by first +// name +const compareNames = (a, b) => + a.localeCompare(b, undefined, { sensitivty: "base" }); + +export const getSortedUsers = createSelector( + [getUsers], + users => + users && + _.values(users).sort( + (a, b) => + compareNames(a.last_name, b.last_name) || + compareNames(a.first_name, b.first_name), + ), +); diff --git a/project.clj b/project.clj index c86d62e82ec1f63d633b751db5adac0cfbfc197d..d9a5a61ff8c87a547a50f25ebeff0d25d93ab6f5 100644 --- a/project.clj +++ b/project.clj @@ -10,8 +10,7 @@ "test" ["with-profile" "+expectations" "expectations"] "generate-sample-dataset" ["with-profile" "+generate-sample-dataset" "run"] "profile" ["with-profile" "+profile" "run" "profile"] - "h2" ["with-profile" "+h2-shell" "run" "-url" "jdbc:h2:./metabase.db" "-user" "" "-password" "" "-driver" "org.h2.Driver"] - "validate-automagic-dashboards" ["with-profile" "+validate-automagic-dashboards" "run"]} + "h2" ["with-profile" "+h2-shell" "run" "-url" "jdbc:h2:./metabase.db" "-user" "" "-password" "" "-driver" "org.h2.Driver"]} :dependencies [[org.clojure/clojure "1.8.0"] [org.clojure/core.async "0.3.442"] [org.clojure/core.match "0.3.0-alpha4"] ; optimized pattern matching library for Clojure @@ -93,16 +92,6 @@ [org.liquibase/liquibase-core "3.5.3"] ; migration management (Java lib) [org.postgresql/postgresql "42.1.4.jre7"] ; Postgres driver [org.slf4j/slf4j-log4j12 "1.7.25"] ; abstraction for logging frameworks -- allows end user to plug in desired logging framework at deployment time - [org.spark-project.hive/hive-jdbc "1.2.1.spark2" ; JDBC Driver for Apache Spark - :exclusions [org.apache.curator/curator-framework - org.apache.curator/curator-recipes - org.apache.thrift/libfb303 - org.apache.zookeeper/zookeeper - org.eclipse.jetty.aggregate/jetty-all - org.spark-project.hive/hive-common - org.spark-project.hive/hive-metastore - org.spark-project.hive/hive-serde - org.spark-project.hive/hive-shims]] [org.tcrawley/dynapath "0.2.5"] ; Dynamically add Jars (e.g. Oracle or Vertica) to classpath [org.xerial/sqlite-jdbc "3.21.0.1"] ; SQLite driver [org.yaml/snakeyaml "1.18"] ; YAML parser (required by liquibase) @@ -129,7 +118,6 @@ "-Xverify:none" ; disable bytecode verification when running in dev so it starts slightly faster "-XX:+CMSClassUnloadingEnabled" ; let Clojure's dynamically generated temporary classes be GC'ed from PermGen "-XX:+UseConcMarkSweepGC" ; Concurrent Mark Sweep GC needs to be used for Class Unloading (above) - "--add-opens=java.base/java.net=ALL-UNNAMED" ; let Java 9 dynamically add to classpath -- see https://github.com/tobias/dynapath#note-on-java-9 "--add-modules=java.xml.bind" ; tell Java 9 (Oracle VM only) to add java.xml.bind to classpath. No longer on it by default. See https://stackoverflow.com/questions/43574426/how-to-resolve-java-lang-noclassdeffounderror-javax-xml-bind-jaxbexception-in-j "-Djava.awt.headless=true"] ; prevent Java icon from randomly popping up in dock when running `lein ring server` :javac-options ["-target" "1.7", "-source" "1.7"] @@ -165,10 +153,7 @@ :env {:mb-run-mode "dev"} :jvm-opts ["-Dlogfile.path=target/log"] ;; Log appender class needs to be compiled for log4j to use it, - ;; classes for fixed Hive driver in must be compiled for tests - :aot [metabase.logger - metabase.driver.FixedHiveConnection - metabase.driver.FixedHiveDriver]} + :aot [metabase.logger]} :ci {:jvm-opts ["-Xmx3g"]} :reflection-warnings {:global-vars {*warn-on-reflection* true}} ; run `lein check-reflection-warnings` to check for reflection warnings :expectations {:injections [(require 'metabase.test-setup ; for test setup stuff @@ -195,5 +180,4 @@ :profile {:jvm-opts ["-XX:+CITime" ; print time spent in JIT compiler "-XX:+PrintGC"]} ; print a message when garbage collection takes place ;; get the H2 shell with 'lein h2' - :h2-shell {:main org.h2.tools.Shell} - :validate-automagic-dashboards {:main metabase.automagic-dashboards.rules}}) + :h2-shell {:main org.h2.tools.Shell}}) diff --git a/src/metabase/api/automagic_dashboards.clj b/src/metabase/api/automagic_dashboards.clj index acc12f9786b85fb87c17b898082285177bb55520..d46dce5a12d75d3ca3642d22d89ba9bdd59e5f66 100644 --- a/src/metabase/api/automagic_dashboards.clj +++ b/src/metabase/api/automagic_dashboards.clj @@ -30,20 +30,20 @@ (def ^:private Prefix (su/with-api-error-message - (->> ["table" "metric" "field"] - (mapcat rules/load-rules) - (filter :indepth) - (map :rule) - (apply s/enum)) + (s/pred (fn [prefix] + (some #(not-empty (rules/get-rules [% prefix])) ["table" "metric" "field"]))) (tru "invalid value for prefix"))) (def ^:private Rule (su/with-api-error-message - (->> ["table" "metric" "field"] - (mapcat rules/load-rules) - (mapcat :indepth) - (map :rule) - (apply s/enum)) + (s/pred (fn [rule] + (some (fn [toplevel] + (some (comp rules/get-rule + (fn [prefix] + [toplevel prefix rule]) + :rule) + (rules/get-rules [toplevel]))) + ["table" "metric" "field"]))) (tru "invalid value for rule name"))) (def ^:private ^{:arglists '([s])} decode-base64-json @@ -51,13 +51,9 @@ (def ^:private Base64EncodedJSON (su/with-api-error-message - (s/pred decode-base64-json "valid base64 encoded json") + (s/pred decode-base64-json) (tru "value couldn''t be parsed as base64 encoded JSON"))) -(defn- load-rule - [entity prefix rule] - (rules/load-rule (format "%s/%s/%s.yaml" entity prefix rule))) - (api/defendpoint GET "/database/:id/candidates" "Return a list of candidates for automagic dashboards orderd by interestingness." [id] @@ -84,7 +80,7 @@ Table api/check-404 (magic/automagic-analysis - {:rule (load-rule "table" prefix rule) + {:rule ["table" prefix rule] :show (keyword show)}))) (api/defendpoint GET "/segment/:id" @@ -103,7 +99,7 @@ Segment api/check-404 (magic/automagic-analysis - {:rule (load-rule "table" prefix rule) + {:rule ["table" prefix rule] :show (keyword show)}))) (api/defendpoint GET "/question/:id/cell/:cell-query" @@ -130,7 +126,7 @@ Card api/check-404 (magic/automagic-analysis {:show (keyword show) - :rule (load-rule "table" prefix rule) + :rule ["table" prefix rule] :cell-query (decode-base64-json cell-query)}))) (api/defendpoint GET "/metric/:id" @@ -158,7 +154,7 @@ prefix Prefix rule Rule} (-> id Card api/check-404 (magic/automagic-analysis {:show (keyword show) - :rule (load-rule "table" prefix rule)}))) + :rule ["table" prefix rule]}))) (api/defendpoint GET "/adhoc/:query" "Return an automagic dashboard analyzing ad hoc query." @@ -181,7 +177,7 @@ decode-base64-json query/adhoc-query (magic/automagic-analysis {:show (keyword show) - :rule (load-rule "table" prefix rule)}))) + :rule ["table" prefix rule]}))) (api/defendpoint GET "/adhoc/:query/cell/:cell-query" "Return an automagic dashboard analyzing ad hoc query." @@ -211,7 +207,7 @@ query/adhoc-query (magic/automagic-analysis {:show (keyword show) :cell-query cell-query - :rule (load-rule "table" prefix rule)})))) + :rule ["table" prefix rule]})))) (def ^:private valid-comparison-pair? #{["segment" "segment"] diff --git a/src/metabase/api/common.clj b/src/metabase/api/common.clj index 0371d49afc174d1eb10fa4c345d2e29a21782ca3..9162a54c4a36d647ea92e8ff2ce27f1ffa439914 100644 --- a/src/metabase/api/common.clj +++ b/src/metabase/api/common.clj @@ -14,15 +14,11 @@ [metabase.api.common.internal :refer :all] [metabase.models.interface :as mi] [puppetlabs.i18n.core :refer [trs tru]] - [ring.util - [io :as rui] - [response :as rr]] [ring.core.protocols :as protocols] [ring.util.response :as response] [schema.core :as s] [toucan.db :as db]) - (:import [java.io BufferedWriter OutputStream OutputStreamWriter] - [java.nio.charset Charset StandardCharsets])) + (:import java.io.OutputStream)) (declare check-403 check-404) diff --git a/src/metabase/api/field.clj b/src/metabase/api/field.clj index 8a5d17741cc7fe0a31bc50b16764bb596c29442b..a1ea3eec15b4ba298387c1acaa1c1e597624608d 100644 --- a/src/metabase/api/field.clj +++ b/src/metabase/api/field.clj @@ -75,7 +75,7 @@ points_of_interest (s/maybe su/NonBlankString) special_type (s/maybe FieldType) visibility_type (s/maybe FieldVisibilityType) - has_field_values (s/maybe (s/enum "search" "list" "none"))} + has_field_values (s/maybe (apply s/enum (map name field/has-field-values-options)))} (let [field (hydrate (api/write-check Field id) :dimensions) new-special-type (keyword (get body :special_type (:special_type field))) removed-fk? (removed-fk-special-type? (:special_type field) new-special-type) diff --git a/src/metabase/api/routes.clj b/src/metabase/api/routes.clj index 9ca6732deee0806b08964766a234ee3b976c3f53..75367a717f37ca99a91ac80dd9280e5d190b7d8c 100644 --- a/src/metabase/api/routes.clj +++ b/src/metabase/api/routes.clj @@ -36,7 +36,8 @@ [user :as user] [util :as util] [x-ray :as x-ray]] - [metabase.middleware :as middleware])) + [metabase.middleware :as middleware] + [puppetlabs.i18n.core :refer [tru]])) (def ^:private +generic-exceptions "Wrap ROUTES so any Exception thrown is just returned as a generic 400, to prevent details from leaking in public @@ -90,6 +91,4 @@ (context "/tiles" [] (+auth tiles/routes)) (context "/user" [] (+auth user/routes)) (context "/util" [] util/routes) - (route/not-found (fn [{:keys [request-method uri]}] - {:status 404 - :body (str (.toUpperCase (name request-method)) " " uri " does not exist.")}))) + (route/not-found (constantly {:status 404, :body (tru "API endpoint does not exist.")}))) diff --git a/src/metabase/api/user.clj b/src/metabase/api/user.clj index fdb7a6b7191c02fdf13da5a6bcf85fa60549c9c2..342e59eb7a586eb2c79620fc78daf83c00653b92 100644 --- a/src/metabase/api/user.clj +++ b/src/metabase/api/user.clj @@ -23,7 +23,9 @@ "Fetch a list of all active `Users` for the admin People page." [] (db/select [User :id :first_name :last_name :email :is_superuser :google_auth :ldap_auth :last_login] - :is_active true)) + :is_active true + {:order-by [[:%lower.last_name :asc] + [:%lower.first_name :asc]]})) (defn- reactivate-user! [existing-user first-name last-name] (when-not (:is_active existing-user) diff --git a/src/metabase/automagic_dashboards/core.clj b/src/metabase/automagic_dashboards/core.clj index b479ead6b762baede773c8d2c4d80984fdebcc3d..d491a4fccf7ecc3adb3a40e9506fe34ddf121dff 100644 --- a/src/metabase/automagic_dashboards/core.clj +++ b/src/metabase/automagic_dashboards/core.clj @@ -30,8 +30,9 @@ [metabase.related :as related] [metabase.sync.analyze.classify :as classify] [metabase.util :as u] - [puppetlabs.i18n.core :as i18n :refer [tru]] + [puppetlabs.i18n.core :as i18n :refer [tru trs]] [ring.util.codec :as codec] + [schema.core :as s] [toucan.db :as db])) (def ^:private public-endpoint "/auto/dashboard/") @@ -50,7 +51,7 @@ :source table :database (:db_id table) :url (format "%stable/%s" public-endpoint (u/get-id table)) - :rules-prefix "table"}) + :rules-prefix ["table"]}) (defmethod ->root (type Segment) [segment] @@ -61,7 +62,7 @@ :database (:db_id table) :query-filter (-> segment :definition :filter) :url (format "%ssegment/%s" public-endpoint (u/get-id segment)) - :rules-prefix "table"})) + :rules-prefix ["table"]})) (defmethod ->root (type Metric) [metric] @@ -71,7 +72,7 @@ :source table :database (:db_id table) :url (format "%smetric/%s" public-endpoint (u/get-id metric)) - :rules-prefix "metric"})) + :rules-prefix ["metric"]})) (defmethod ->root (type Field) [field] @@ -81,7 +82,7 @@ :source table :database (:db_id table) :url (format "%sfield/%s" public-endpoint (u/get-id field)) - :rules-prefix "field"})) + :rules-prefix ["field"]})) (defmulti ^{:doc "Get a reference for a given model to be injected into a template @@ -420,15 +421,16 @@ (assoc :score score :dataset_query query)))))))) -(def ^:private ^{:arglists '([rule])} rule-specificity - (comp (partial transduce (map (comp count ancestors)) +) :applies_to)) +(s/defn ^:private rule-specificity + [rule :- rules/Rule] + (transduce (map (comp count ancestors)) + (:applies_to rule))) -(defn- matching-rules +(s/defn ^:private matching-rules "Return matching rules orderd by specificity. Most specific is defined as entity type specification the longest ancestor chain." - [rules {:keys [source entity]}] - (let [table-type (:entity_type source)] + [rules :- [rules/Rule], {:keys [source entity]}] + (let [table-type (or (:entity_type source) :entity/GenericTable)] (->> rules (filter (fn [{:keys [applies_to]}] (let [[entity-type field-type] applies_to] @@ -479,8 +481,8 @@ [context _] context) -(defn- make-context - [root rule] +(s/defn ^:private make-context + [root, rule :- rules/Rule] {:pre [(:source root)]} (let [source (:source root) tables (concat [source] (when (instance? (type Table) source) @@ -524,10 +526,10 @@ vals (apply concat))) -(defn- make-dashboard - ([root rule] +(s/defn ^:private make-dashboard + ([root, rule :- rules/Rule] (make-dashboard root rule {:tables [(:source root)]})) - ([root rule context] + ([root, rule :- rules/Rule, context] (let [this {"this" (-> root :entity (assoc :full-name (:full-name root)))}] @@ -540,8 +542,8 @@ (update :groups (partial fill-templates :string context {})) (assoc :refinements (:cell-query root)))))) -(defn- apply-rule - [root rule] +(s/defn ^:private apply-rule + [root, rule :- rules/Rule] (let [context (make-context root rule) dashboard (make-dashboard root rule context) filters (->> rule @@ -568,11 +570,11 @@ [entity] (let [root (->root entity) rule (->> root - (matching-rules (rules/load-rules (:rules-prefix root))) + (matching-rules (rules/get-rules (:rules-prefix root))) first) dashboard (make-dashboard root rule)] {:url (:url root) - :title (-> root :full-name str/capitalize) + :title (:full-name root) :description (:description dashboard)})) (defn- others @@ -587,10 +589,9 @@ (take n) (map ->related-entity))))) -(defn- indepth - [root rule] - (->> rule - :indepth +(s/defn ^:private indepth + [root, rule :- rules/Rule] + (->> (rules/get-rules (concat (:rules-prefix root) [(:rule rule)])) (keep (fn [indepth] (when-let [[dashboard _] (apply-rule root indepth)] {:title ((some-fn :short-title :title) dashboard) @@ -599,8 +600,8 @@ (:rule indepth))}))) (take max-related))) -(defn- related - [root rule] +(s/defn ^:private related + [root, rule :- rules/Rule] (let [indepth (indepth root rule)] {:indepth indepth :tables (take (- max-related (count indepth)) (others root))})) @@ -608,40 +609,45 @@ (defn- automagic-dashboard "Create dashboards for table `root` using the best matching heuristics." [{:keys [rule show rules-prefix query-filter cell-query full-name] :as root}] - (when-let [[dashboard rule] (if rule - (apply-rule root rule) - (->> root - (matching-rules (rules/load-rules rules-prefix)) - (keep (partial apply-rule root)) - ;; `matching-rules` returns an `ArraySeq` (via `sort-by`) so - ;; `first` realises one element at a time (no chunking). - first))] - (log/info (format "Applying heuristic %s to %s." (:rule rule) full-name)) - (log/info (format "Dimensions bindings:\n%s" - (->> dashboard - :context - :dimensions - (m/map-vals #(update % :matches (partial map :name))) - u/pprint-to-str))) - (log/info (format "Using definitions:\nMetrics:\n%s\nFilters:\n%s" - (-> dashboard :context :metrics u/pprint-to-str) - (-> dashboard :context :filters u/pprint-to-str))) - (-> (cond-> dashboard - (or query-filter cell-query) - (assoc :title (str (tru "A closer look at ") full-name))) - (populate/create-dashboard (or show max-cards)) - (assoc :related (-> (related root rule) - (assoc :more (if (and (-> dashboard - :cards - count - (> max-cards)) - (not= show :all)) - [{:title (tru "Show more about this") - :description nil - :table (:source root) - :url (format "%s#show=all" - (:url root))}] - []))))))) + (if-let [[dashboard rule] (if rule + (apply-rule root (rules/get-rule rule)) + (->> root + (matching-rules (rules/get-rules rules-prefix)) + (keep (partial apply-rule root)) + ;; `matching-rules` returns an `ArraySeq` (via `sort-by`) so + ;; `first` realises one element at a time (no chunking). + first))] + (do + (log/infof (trs "Applying heuristic %s to %s.") (:rule rule) full-name) + (log/infof (trs "Dimensions bindings:\n%s") + (->> dashboard + :context + :dimensions + (m/map-vals #(update % :matches (partial map :name))) + u/pprint-to-str)) + (log/infof (trs "Using definitions:\nMetrics:\n%s\nFilters:\n%s") + (-> dashboard :context :metrics u/pprint-to-str) + (-> dashboard :context :filters u/pprint-to-str)) + (-> (cond-> dashboard + (or query-filter cell-query) + (assoc :title (str (tru "A closer look at ") full-name))) + (populate/create-dashboard (or show max-cards)) + (assoc :related (-> (related root rule) + (assoc :more (if (and (-> dashboard + :cards + count + (> max-cards)) + (not= show :all)) + [{:title (tru "Show more about this") + :description nil + :table (:source root) + :url (format "%s#show=all" + (:url root))}] + [])))))) + (throw (ex-info (format (trs "Can't create dashboard for %s") full-name) + {:root root + :available-rules (map :rule (or (some-> rule rules/get-rule vector) + (rules/get-rules rules-prefix)))})))) (def ^:private ^{:arglists '([card])} table-like? (comp empty? #(qp.util/get-in-normalized % [:dataset_query :query :aggregation]))) @@ -699,7 +705,7 @@ (u/get-id card) (encode-base64-json cell-query)) (format "%squestion/%s" public-endpoint (u/get-id card))) - :rules-prefix "table"} + :rules-prefix ["table"]} opts))) nil)) @@ -732,7 +738,7 @@ (encode-base64-json cell-query)) (format "%sadhoc/%s" public-endpoint (encode-base64-json query))) - :rules-prefix "table"} + :rules-prefix ["table"]} (update opts :cell-query merge-filter-clauses (qp.util/get-in-normalized query [:dataset_query :query :filter]))))) nil)) @@ -765,12 +771,10 @@ interestingness of tables they contain (see above)." ([database] (candidate-tables database nil)) ([database schema] - (let [rules (rules/load-rules "table")] + (let [rules (rules/get-rules ["table"])] (->> (apply db/select Table (cond-> [:db_id (u/get-id database) - :visibility_type nil - :entity_type [:not= nil]] ; only consider tables that have alredy - ; been analyzed + :visibility_type nil] schema (concat [:schema schema]))) (filter mi/can-read?) (map enhanced-table-stats) diff --git a/src/metabase/automagic_dashboards/populate.clj b/src/metabase/automagic_dashboards/populate.clj index 82ce00dedfb676b0d93ed72a08debf625510e5db..9b4dacdc38b938e5a2a616e4db5e1c5c442a62c9 100644 --- a/src/metabase/automagic_dashboards/populate.clj +++ b/src/metabase/automagic_dashboards/populate.clj @@ -6,6 +6,7 @@ [metabase.automagic-dashboards.filters :as magic.filters] [metabase.models.card :as card] [metabase.query-processor.util :as qp.util] + [puppetlabs.i18n.core :as i18n :refer [trs]] [toucan.db :as db])) (def ^Long ^:const grid-width @@ -252,10 +253,10 @@ ;; Height doesn't need to be precise, just some ;; safe upper bound. (make-grid grid-width (* n grid-width))]))] - (log/info (format "Adding %s cards to dashboard %s:\n%s" - (count cards) - title - (str/join "; " (map :title cards)))) + (log/infof (trs "Adding %s cards to dashboard %s:\n%s") + (count cards) + title + (str/join "; " (map :title cards))) (cond-> dashboard (not-empty filters) (magic.filters/add-filters filters max-filters))))) diff --git a/src/metabase/automagic_dashboards/rules.clj b/src/metabase/automagic_dashboards/rules.clj index fb94ba1fe1093f2c0952be39386440a53325ab2a..ee4c7fe93f32d2b6af323254b7b81ed66ef3f607 100644 --- a/src/metabase/automagic_dashboards/rules.clj +++ b/src/metabase/automagic_dashboards/rules.clj @@ -7,19 +7,20 @@ [metabase.types] [metabase.util :as u] [metabase.util.schema :as su] + [puppetlabs.i18n.core :as i18n :refer [trs]] [schema [coerce :as sc] [core :as s]] [yaml.core :as yaml]) (:import java.nio.file.Path java.nio.file.FileSystems java.nio.file.FileSystem - java.nio.file.Files )) + java.nio.file.Files)) (def ^Long ^:const max-score "Maximal (and default) value for heuristics scores." 100) (def ^:private Score (s/constrained s/Int #(<= 0 % max-score) - (str "0 <= score <= " max-score))) + (format (trs "0 <= score <= %s") max-score))) (def ^:private MBQL [s/Any]) @@ -80,7 +81,7 @@ (def ^:private Visualization [(s/one s/Str "visualization") su/Map]) (def ^:private Width (s/constrained s/Int #(<= 1 % populate/grid-width) - (format "1 <= width <= %s" + (format (trs "1 <= width <= %s") populate/grid-width))) (def ^:private Height (s/constrained s/Int pos?)) @@ -186,7 +187,8 @@ schema (partition 2 constraints))) -(def ^:private Rules +(def Rule + "Rules defining an automagic dashboard." (constrained-all {(s/required-key :title) s/Str (s/required-key :dimensions) [Dimension] @@ -201,13 +203,13 @@ (s/optional-key :groups) Groups (s/optional-key :indepth) [s/Any] (s/optional-key :dashboard_filters) [s/Str]} - valid-metrics-references? "Valid metrics references" - valid-filters-references? "Valid filters references" - valid-group-references? "Valid group references" - valid-order-by-references? "Valid order_by references" - valid-dashboard-filters-references? "Valid dashboard filters references" - valid-dimension-references? "Valid dimension references" - valid-breakout-dimension-references? "Valid card dimension references")) + valid-metrics-references? (trs "Valid metrics references") + valid-filters-references? (trs "Valid filters references") + valid-group-references? (trs "Valid group references") + valid-order-by-references? (trs "Valid order_by references") + valid-dashboard-filters-references? (trs "Valid dashboard filters references") + valid-dimension-references? (trs "Valid dimension references") + valid-breakout-dimension-references? (trs "Valid card dimension references"))) (defn- with-defaults [defaults] @@ -234,7 +236,7 @@ (def ^:private rules-validator (sc/coercer! - Rules + Rule {[s/Str] ensure-seq [OrderByPair] ensure-seq OrderByPair (fn [x] @@ -276,79 +278,79 @@ (def ^:private rules-dir "automagic_dashboards/") -(def ^:private ^{:arglists '([f])} file->table-type - (comp (partial re-find #".+(?=\.yaml)") str (memfn ^Path getFileName))) - -(def ^:private ^{:arglists '([f])} file->parent-dir - (comp last #(str/split % #"/") str (memfn ^Path getParent))) - -(defmacro ^:private with-resources - [identifier & body] - `(let [uri# (-> rules-dir io/resource .toURI)] - (let [[fs# path#] (-> uri# .toString (str/split #"!" 2))] - (if path# - (with-open [^FileSystem ~identifier - (-> fs# - java.net.URI/create - (FileSystems/newFileSystem (java.util.HashMap.)))] - ~@body) - (let [~identifier (FileSystems/getDefault)] - ~@body))))) - -(defn- resource-path - [^FileSystem fs path] - (when-let [path (some->> path (str rules-dir) io/resource)] - (let [path (if (-> path str (str/starts-with? "jar")) - (-> path str (str/split #"!" 2) second) - (.getPath path))] - (.getPath fs path (into-array String []))))) - -(declare load-rules) - -(defn load-rule - "Load and validate rule from file `f`." - ([f] - (with-resources fs - (some->> f (resource-path fs) (load-rule fs)))) - ([fs ^Path f] - (try - (-> f - .toUri - slurp - yaml/parse-string - (assoc :rule (file->table-type f)) - (update :applies_to #(or % (file->table-type f))) - rules-validator - (assoc :indepth (load-rules fs (format "%s/%s" - (file->parent-dir f) - (file->table-type f))))) - (catch Exception e - (log/error (format "Error parsing %s:\n%s" - (.getFileName f) - (or (some-> e - ex-data - (select-keys [:error :value]) - u/pprint-to-str) - e))) - nil)))) - -(defn load-rules - "Load and validate all rules in dir." - ([dir] - (with-resources fs - (load-rules fs dir))) - ([fs dir] - (when-let [dir (resource-path fs dir)] - (with-open [ds (Files/newDirectoryStream dir)] - (->> ds - (filter #(str/ends-with? (.toString ^Path %) ".yaml")) - (keep (partial load-rule fs)) - doall))))) - -(defn -main - "Entry point for lein task `validate-automagic-dashboards`" - [& _] - (dorun (load-rules "tables")) - (dorun (load-rules "metrics")) - (dorun (load-rules "fields")) - (System/exit 0)) +(def ^:private ^{:arglists '([f])} file->entity-type + (comp (partial re-find #".+(?=\.yaml$)") str (memfn ^Path getFileName))) + +(defn- load-rule + [^Path f] + (try + (let [entity-type (file->entity-type f)] + (-> f + .toUri + slurp + yaml/parse-string + (assoc :rule entity-type) + (update :applies_to #(or % entity-type)) + rules-validator)) + (catch Exception e + (log/errorf (trs "Error parsing %s:\n%s") + (.getFileName f) + (or (some-> e + ex-data + (select-keys [:error :value]) + u/pprint-to-str) + e)) + nil))) + +(defn- trim-trailing-slash + [s] + (if (str/ends-with? s "/") + (subs s 0 (-> s count dec)) + s)) + +(defn- load-rule-dir + ([dir] (load-rule-dir dir [] {})) + ([dir path rules] + (with-open [ds (Files/newDirectoryStream dir)] + (reduce (fn [rules ^Path f] + (cond + (Files/isDirectory f (into-array java.nio.file.LinkOption [])) + (load-rule-dir f (->> f (.getFileName) str trim-trailing-slash (conj path)) rules) + + (file->entity-type f) + (assoc-in rules (concat path [(file->entity-type f) ::leaf]) (load-rule f)) + + :else + rules)) + rules + ds)))) + +(defmacro ^:private with-resource + [[identifier path] & body] + `(let [[jar# path#] (-> ~path .toString (str/split #"!" 2))] + (if path# + (with-open [^FileSystem fs# (-> jar# + java.net.URI/create + (FileSystems/newFileSystem (java.util.HashMap.)))] + (let [~identifier (.getPath fs# path# (into-array String []))] + ~@body)) + (let [~identifier (.getPath (FileSystems/getDefault) (.getPath ~path) (into-array String []))] + ~@body)))) + +(def ^:private rules (delay + (with-resource [path (-> rules-dir io/resource .toURI)] + (into {} (load-rule-dir path))))) + +(defn get-rules + "Get all rules with prefix `prefix`. + prefix is greedy, so [\"table\"] will match table/TransactionTable.yaml, but not + table/TransactionTable/ByCountry.yaml" + [prefix] + (->> prefix + (get-in @rules) + (keep (comp ::leaf val)))) + +(defn get-rule + "Get rule at path `path`." + [path] + (get-in @rules (concat path [::leaf]))) diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj index caae266a05e7b0e6b30295ec7c1d21dd1e25143a..7dc23248e6e32ab127ba3eec332ef2284a06f591 100644 --- a/src/metabase/db/metadata_queries.clj +++ b/src/metabase/db/metadata_queries.clj @@ -4,11 +4,11 @@ [metabase [query-processor :as qp] [util :as u]] - [metabase.models - [field-values :as field-values] - [table :refer [Table]]] + [metabase.models.table :refer [Table]] [metabase.query-processor.interface :as qpi] [metabase.query-processor.middleware.expand :as ql] + [metabase.util.schema :as su] + [schema.core :as s] [toucan.db :as db])) (defn- qp-query [db-id query] @@ -42,15 +42,32 @@ (u/pprint-to-str results)) (throw e))))) -(defn field-distinct-values +(def ^:private ^Integer absolute-max-distinct-values-limit + "The absolute maximum number of results to return for a `field-distinct-values` query. Normally Fields with 100 or + less values (at the time of this writing) get marked as `auto-list` Fields, meaning we save all their distinct + values in a FieldValues object, which powers a list widget in the FE when using the Field for filtering in the QB. + Admins can however manually mark any Field as `list`, which is effectively ordering Metabase to keep FieldValues for + the Field regardless of its cardinality. + + Of course, if a User does something crazy, like mark a million-arity Field as List, we don't want Metabase to + explode trying to make their dreams a reality; we need some sort of hard limit to prevent catastrophes. So this + limit is effectively a safety to prevent Users from nuking their own instance for Fields that really shouldn't be + List Fields at all. For these very-high-cardinality Fields, we're effectively capping the number of + FieldValues that get could saved. + + This number should be a balance of: + + * Not being too low, which would definitly result in GitHub issues along the lines of 'My 500-distinct-value Field + that I marked as List is not showing all values in the List Widget' + * Not being too high, which would result in Metabase running out of memory dealing with too many values" + (int 5000)) + +(s/defn field-distinct-values "Return the distinct values of FIELD. This is used to create a `FieldValues` object for `:type/Category` Fields." ([field] - ;; fetch up to one more value than allowed for FieldValues. e.g. if the max is 100 distinct values fetch up to 101. - ;; That way we will know if we're over the limit - (field-distinct-values field (inc field-values/list-cardinality-threshold))) - ([field max-results] - {:pre [(integer? max-results)]} + (field-distinct-values field absolute-max-distinct-values-limit)) + ([field, max-results :- su/IntGreaterThanZero] (mapv first (field-query field (-> {} (ql/breakout (ql/field-id (u/get-id field))) (ql/limit max-results)))))) diff --git a/src/metabase/driver/FixedHiveConnection.clj b/src/metabase/driver/FixedHiveConnection.clj deleted file mode 100644 index 6a06958e77514861d0cb8f5eaec6c4784276850a..0000000000000000000000000000000000000000 --- a/src/metabase/driver/FixedHiveConnection.clj +++ /dev/null @@ -1,26 +0,0 @@ -(ns metabase.driver.FixedHiveConnection - (:import [org.apache.hive.jdbc HiveConnection] - [java.sql ResultSet SQLException] - java.util.Properties) - (:gen-class - :extends org.apache.hive.jdbc.HiveConnection - :init init - :constructors {[String java.util.Properties] [String java.util.Properties]})) - -(defn -init - "Initializes the connection" - [uri properties] - [[uri properties] nil]) - -(defn -getHoldability - "Returns the holdability setting for this JDBC driver" - [^org.apache.hive.jdbc.HiveConnection this] - ResultSet/CLOSE_CURSORS_AT_COMMIT) - -(defn -setReadOnly - "Sets this connection to read only" - [^org.apache.hive.jdbc.HiveConnection this read-only?] - (when (.isClosed this) - (throw (SQLException. "Connection is closed"))) - (when read-only? - (throw (SQLException. "Enabling read-only mode is not supported")))) diff --git a/src/metabase/driver/FixedHiveDriver.clj b/src/metabase/driver/FixedHiveDriver.clj deleted file mode 100644 index b477ab10bb0b5dae26ed7a59346d17b99b718a4d..0000000000000000000000000000000000000000 --- a/src/metabase/driver/FixedHiveDriver.clj +++ /dev/null @@ -1,19 +0,0 @@ -(ns metabase.driver.FixedHiveDriver - (:import [org.apache.hive.jdbc HiveDriver] - java.util.Properties) - (:gen-class - :extends org.apache.hive.jdbc.HiveDriver - :init init - :prefix "driver-" - :constructors {[] []})) - -(defn driver-init - "Initializes the Hive driver, fixed to work with Metabase" - [] - [[] nil]) - -(defn driver-connect - "Connects to a Hive compatible database" - [^org.apache.hive.jdbc.HiveDriver this ^String url ^java.util.Properties info] - (when (.acceptsURL this url) - (clojure.lang.Reflector/invokeConstructor (Class/forName "metabase.driver.FixedHiveConnection") (to-array [url info])))) diff --git a/src/metabase/driver/sparksql.clj b/src/metabase/driver/sparksql.clj index 0e15195ae021d1cdfc13a32172a02835a4c4c614..fd281834fb76d526035ef4a54f3f3bfdd8d8f0cd 100644 --- a/src/metabase/driver/sparksql.clj +++ b/src/metabase/driver/sparksql.clj @@ -3,6 +3,7 @@ [set :as set] [string :as s]] [clojure.java.jdbc :as jdbc] + [clojure.tools.logging :as log] [honeysql [core :as hsql] [helpers :as h]] @@ -15,7 +16,8 @@ [hive-like :as hive-like]] [metabase.driver.generic-sql.query-processor :as sqlqp] [metabase.query-processor.util :as qputil] - [metabase.util.honeysql-extensions :as hx]) + [metabase.util.honeysql-extensions :as hx] + [puppetlabs.i18n.core :refer [trs]]) (:import clojure.lang.Reflector java.sql.DriverManager metabase.query_processor.interface.Field)) @@ -94,23 +96,6 @@ [{:keys [host port db jdbc-flags] :or {host "localhost", port 10000, db "", jdbc-flags ""} :as opts}] - ;; manually register our FixedHiveDriver with java.sql.DriverManager and make sure it's the only driver returned for - ;; jdbc:hive2, since we do not want to use the driver registered by the super class of our FixedHiveDriver. - ;; - ;; Class/forName and invokeConstructor is required to make this compile, but it may be possible to solve this with - ;; the right project.clj magic - (DriverManager/registerDriver - (Reflector/invokeConstructor - (Class/forName "metabase.driver.FixedHiveDriver") - (into-array []))) - (loop [] - (when-let [driver (try - (DriverManager/getDriver "jdbc:hive2://localhost:10000") - (catch java.sql.SQLException _ - nil))] - (when-not (instance? (Class/forName "metabase.driver.FixedHiveDriver") driver) - (DriverManager/deregisterDriver driver) - (recur)))) (merge {:classname "metabase.driver.FixedHiveDriver" :subprotocol "hive2" :subname (str "//" host ":" port "/" db jdbc-flags)} @@ -223,4 +208,39 @@ :string-length-fn (u/drop-first-arg hive-like/string-length-fn) :unix-timestamp->timestamp (u/drop-first-arg hive-like/unix-timestamp->timestamp)})) -(driver/register-driver! :sparksql (SparkSQLDriver.)) +(defn- register-hive-jdbc-driver! [& {:keys [remaining-tries], :or {remaining-tries 5}}] + ;; manually register our FixedHiveDriver with java.sql.DriverManager + (DriverManager/registerDriver + (Reflector/invokeConstructor + (Class/forName "metabase.driver.FixedHiveDriver") + (into-array []))) + ;; now make sure it's the only driver returned + ;; for jdbc:hive2, since we do not want to use the driver registered by the super class of our FixedHiveDriver. + (when-let [driver (u/ignore-exceptions + (DriverManager/getDriver "jdbc:hive2://localhost:10000"))] + (let [registered? (instance? (Class/forName "metabase.driver.FixedHiveDriver") driver)] + (cond + registered? + true + + ;; if it's not the registered driver, deregister the current driver (if applicable) and try a couple more times + ;; before giving up :( + (and (not registered?) + (> remaining-tries 0)) + (do + (when driver + (DriverManager/deregisterDriver driver)) + (recur {:remaining-tries (dec remaining-tries)})) + + :else + (log/error + (trs "Error: metabase.driver.FixedHiveDriver is registered, but JDBC does not seem to be using it.")))))) + +(defn -init-driver + "Register the SparkSQL driver if the SparkSQL dependencies are available." + [] + (when (u/ignore-exceptions (Class/forName "metabase.driver.FixedHiveDriver")) + (log/info (trs "Found metabase.driver.FixedHiveDriver.")) + (when (u/ignore-exceptions (register-hive-jdbc-driver!)) + (log/info (trs "Successfully registered metabase.driver.FixedHiveDriver with JDBC.")) + (driver/register-driver! :sparksql (SparkSQLDriver.))))) diff --git a/src/metabase/models/field.clj b/src/metabase/models/field.clj index 51cdcbb016cf0985f94ed7f11fbe87ea4509f70a..09ae4b590779315749401fab31743bd0b81500d0 100644 --- a/src/metabase/models/field.clj +++ b/src/metabase/models/field.clj @@ -15,7 +15,7 @@ ;;; ------------------------------------------------- Type Mappings -------------------------------------------------- -(def ^:const visibility-types +(def visibility-types "Possible values for `Field.visibility_type`." #{:normal ; Default setting. field has no visibility restrictions. :details-only ; For long blob like columns such as JSON. field is not shown in some places on the frontend. @@ -23,6 +23,37 @@ :sensitive ; Strict removal of field from all places except data model listing. queries should error if someone attempts to access. :retired}) ; For fields that no longer exist in the physical db. automatically set by Metabase. QP should error if encountered in a query. +(def has-field-values-options + "Possible options for `has_field_values`. This column is used to determine whether we keep FieldValues for a Field, + and which type of widget should be used to pick values of this Field when filtering by it in the Query Builder." + ;; AUTOMATICALLY-SET VALUES, SET DURING SYNC + ;; + ;; `nil` -- means infer which widget to use based on logic in `with-has-field-values`; this will either return + ;; `:search` or `:none`. + ;; + ;; This is the default state for Fields not marked `auto-list`. Admins cannot explicitly mark a Field as + ;; `has_field_values` `nil`. This value is also subject to automatically change in the future if the values of a + ;; Field change in such a way that it can now be marked `auto-list`. Fields marked `nil` do *not* have FieldValues + ;; objects. + ;; + #{;; The other automatically-set option. Automatically marked as a 'List' Field based on cardinality and other factors + ;; during sync. Store a FieldValues object; use the List Widget. If this Field goes over the distinct value + ;; threshold in a future sync, the Field will get switched back to `has_field_values = nil`. + :auto-list + ;; + ;; EXPLICITLY-SET VALUES, SET BY AN ADMIN + ;; + ;; Admin explicitly marked this as a 'Search' Field, which means we should *not* keep FieldValues, and should use + ;; Search Widget. + :search + ;; Admin explicitly marked this as a 'List' Field, which means we should keep FieldValues, and use the List + ;; Widget. Unlike `auto-list`, if this Field grows past the normal cardinality constraints in the future, it will + ;; remain `List` until explicitly marked otherwise. + :list + ;; Admin explicitly marked that this Field shall always have a plain-text widget, neither allowing search, nor + ;; showing a list of possible values. FieldValues not kept. + :none}) + ;;; ----------------------------------------------- Entity & Lifecycle ----------------------------------------------- @@ -97,7 +128,7 @@ :special_type :keyword :visibility_type :keyword :description :clob - :has_field_values :clob + :has_field_values :keyword :fingerprint :json}) :properties (constantly {:timestamped? true}) :pre-insert pre-insert @@ -149,7 +180,7 @@ {:batched-hydrate :normal_values} [fields] (let [id->field-values (select-field-id->instance (filter fv/field-should-have-field-values? fields) - [FieldValues :id :human_readable_values :values :field_id])] + [FieldValues :id :human_readable_values :values :field_id])] (for [field fields] (assoc field :values (get id->field-values (:id field) []))))) @@ -175,30 +206,30 @@ (or (isa? base-type :type/Text) (isa? base-type :type/TextLike))) -(defn with-has-field-values - "Infer what the value of the `has_field_values` should be for Fields where it's not set. Admins can set this to one - of the values below, but if it's `nil` in the DB we'll infer it automatically. +(defn- infer-has-field-values + "Determine the value of `has_field_values` we should return for a `Field` As of 0.29.1 this doesn't require any DB + calls! :D" + [{has-field-values :has_field_values, :as field}] + (or + ;; if `has_field_values` is set in the DB, use that value; but if it's `auto-list`, return the value as `list` to + ;; avoid confusing FE code, which can remain blissfully unaware that `auto-list` is a thing + (when has-field-values + (if (= (keyword has-field-values) :auto-list) + :list + has-field-values)) + ;; otherwise if it does not have value set in DB we will infer it + (if (is-searchable? field) + :search + :none))) - * `list` = has an associated FieldValues object - * `search` = does not have FieldValues - * `none` = admin has explicitly disabled search behavior for this Field" +(defn with-has-field-values + "Infer what the value of the `has_field_values` should be for Fields where it's not set. See documentation for + `has-field-values-options` above for a more detailed explanation of what these values mean." {:batched-hydrate :has_field_values} [fields] - (let [fields-without-has-field-values-ids (set (for [field fields - :when (nil? (:has_field_values field))] - (:id field))) - fields-with-fieldvalues-ids (when (seq fields-without-has-field-values-ids) - (db/select-field :field_id FieldValues - :field_id [:in fields-without-has-field-values-ids]))] - (for [field fields] - (when field - (assoc field - :has_field_values (or - (:has_field_values field) - (cond - (contains? fields-with-fieldvalues-ids (u/get-id field)) :list - (is-searchable? field) :search - :else :none))))))) + (for [field fields] + (when field + (assoc field :has_field_values (infer-has-field-values field))))) (defn readable-fields-only "Efficiently checks if each field is readable and returns only readable fields" diff --git a/src/metabase/models/field_values.clj b/src/metabase/models/field_values.clj index 703435d1293ea62eaf164dfe65034aed42bb5ce7..3f425ee713bbee014f5a16859627aba164eec1f7 100644 --- a/src/metabase/models/field_values.clj +++ b/src/metabase/models/field_values.clj @@ -1,6 +1,9 @@ (ns metabase.models.field-values (:require [clojure.tools.logging :as log] [metabase.util :as u] + [metabase.util.schema :as su] + [puppetlabs.i18n.core :refer [trs]] + [schema.core :as s] [toucan [db :as db] [models :as models]])) @@ -11,9 +14,9 @@ frontend behavior such as widget choices." (int 30)) -(def ^:const ^Integer list-cardinality-threshold - "Fields with less than this many distincy values should be given a `has_field_values` value of `list`, which means the - Field should have FieldValues." +(def ^:const ^Integer auto-list-cardinality-threshold + "Fields with less than this many distincy values should be given a `has_field_values` value of `list`, which means + the Field should have FieldValues." (int 100)) (def ^:private ^:const ^Integer entry-max-length @@ -22,7 +25,7 @@ (def ^:private ^:const ^Integer total-max-length "Maximum total length for a `FieldValues` entry (combined length of all values for the field)." - (int (* list-cardinality-threshold entry-max-length))) + (int (* auto-list-cardinality-threshold entry-max-length))) ;; ## Entity + DB Multimethods @@ -39,16 +42,18 @@ ;; ## `FieldValues` Helper Functions -(defn field-should-have-field-values? - "Should this `Field` be backed by a corresponding `FieldValues` object?" +(s/defn field-should-have-field-values? :- s/Bool + "Should this `field` be backed by a corresponding `FieldValues` object?" {:arglists '([field])} - [{base-type :base_type, visibility-type :visibility_type, has-field-values :has_field_values, :as field}] - {:pre [visibility-type - (contains? field :base_type) - (contains? field :has_field_values)]} - (and (not (contains? #{:retired :sensitive :hidden :details-only} (keyword visibility-type))) - (not (isa? (keyword base-type) :type/DateTime)) - (= has-field-values "list"))) + [{base-type :base_type, visibility-type :visibility_type, has-field-values :has_field_values, :as field} + :- {:visibility_type su/KeywordOrString + :base_type (s/maybe su/KeywordOrString) + :has_field_values (s/maybe su/KeywordOrString) + s/Keyword s/Any}] + (boolean + (and (not (contains? #{:retired :sensitive :hidden :details-only} (keyword visibility-type))) + (not (isa? (keyword base-type) :type/DateTime)) + (#{:list :auto-list} (keyword has-field-values))))) (defn- values-less-than-total-max-length? @@ -63,32 +68,19 @@ "FieldValues are allowed for this Field." "FieldValues are NOT allowed for this Field."))))) -(defn- cardinality-less-than-threshold? - "`true` if the number of DISTINCT-VALUES is less that `list-cardinality-threshold`. - Does some logging as well." - [distinct-values] - (let [num-values (count distinct-values)] - (u/prog1 (<= num-values list-cardinality-threshold) - (log/debug (if <> - (format "Field has %d distinct values (max %d). FieldValues are allowed for this Field." - num-values list-cardinality-threshold) - (format "Field has over %d values. FieldValues are NOT allowed for this Field." - list-cardinality-threshold)))))) - (defn- distinct-values - "Fetch a sequence of distinct values for FIELD that are below the `total-max-length` threshold. If the values are past - the threshold, this returns `nil`." + "Fetch a sequence of distinct values for `field` that are below the `total-max-length` threshold. If the values are + past the threshold, this returns `nil`." [field] (require 'metabase.db.metadata-queries) (let [values ((resolve 'metabase.db.metadata-queries/field-distinct-values) field)] - (when (cardinality-less-than-threshold? values) - (when (values-less-than-total-max-length? values) - values)))) + (when (values-less-than-total-max-length? values) + values))) (defn- fixup-human-readable-values "Field values and human readable values are lists that are zipped together. If the field values have changes, the - human readable values will need to change too. This function reconstructs the human_readable_values to reflect + human readable values will need to change too. This function reconstructs the `human_readable_values` to reflect `NEW-VALUES`. If a new field value is found, a string version of that is used" [{old-values :values, old-hrv :human_readable_values} new-values] (when (seq old-hrv) @@ -96,24 +88,41 @@ (map #(get orig-remappings % (str %)) new-values)))) (defn create-or-update-field-values! - "Create or update the FieldValues object for FIELD. If the FieldValues object already exists, then update values for + "Create or update the FieldValues object for `field`. If the FieldValues object already exists, then update values for it; otherwise create a new FieldValues object with the newly fetched values." [field & [human-readable-values]] (let [field-values (FieldValues :field_id (u/get-id field)) values (distinct-values field) field-name (or (:name field) (:id field))] (cond + ;; If this Field is marked `auto-list`, and the number of values in now over the list threshold, we need to + ;; unmark it as `auto-list`. Switch it to `has_field_values` = `nil` and delete the FieldValues; this will + ;; result in it getting a Search Widget in the UI when `has_field_values` is automatically inferred by the + ;; `metabase.models.field/infer-has-field-values` hydration function (see that namespace for more detailed + ;; discussion) + ;; + ;; It would be nicer if we could do this in analysis where it gets marked `:auto-list` in the first place, but + ;; Fingerprints don't get updated regularly enough that we could detect the sudden increase in cardinality in a + ;; way that could make this work. Thus, we are stuck doing it here :( + (and (> (count values) auto-list-cardinality-threshold) + (= :auto-list (keyword (:has_field_values field)))) + (do + (log/info (trs "Field {0} was previously automatically set to show a list widget, but now has {1} values." + field-name (count values)) + (trs "Switching Field to use a search widget instead.")) + (db/update! 'Field (u/get-id field) :has_field_values nil) + (db/delete! FieldValues :field_id (u/get-id field))) ;; if the FieldValues object already exists then update values in it (and field-values values) (do - (log/debug (format "Storing updated FieldValues for Field %s..." field-name)) + (log/debug (trs "Storing updated FieldValues for Field {0}..." field-name)) (db/update-non-nil-keys! FieldValues (u/get-id field-values) :values values :human_readable_values (fixup-human-readable-values field-values values))) ;; if FieldValues object doesn't exist create one values (do - (log/debug (format "Storing FieldValues for Field %s..." field-name)) + (log/debug (trs "Storing FieldValues for Field {0}..." field-name)) (db/insert! FieldValues :field_id (u/get-id field) :values values diff --git a/src/metabase/models/interface.clj b/src/metabase/models/interface.clj index 17e47a64b0b052e7784b726d562dd121f9b8a1a7..1e075ee6f8969234b177a1a9801df02ca2d5aeb3 100644 --- a/src/metabase/models/interface.clj +++ b/src/metabase/models/interface.clj @@ -8,7 +8,9 @@ [encryption :as encryption]] [schema.core :as s] [taoensso.nippy :as nippy] - [toucan.models :as models]) + [toucan + [models :as models] + [util :as toucan-util]]) (:import java.sql.Blob)) ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -79,6 +81,13 @@ :in validate-cron-string :out identity) +;; Toucan ships with a Keyword type, but on columns that are marked 'TEXT' it doesn't work properly since the values +;; might need to get de-CLOB-bered first. So replace the default Toucan `:keyword` implementation with one that +;; handles those cases. +(models/add-type! :keyword + :in toucan-util/keyword->qualified-name + :out (comp keyword u/jdbc-clob->str)) + ;;; properties diff --git a/src/metabase/models/params.clj b/src/metabase/models/params.clj index 19390eb6e02d6e6ae0b5c2d70e1f039ca11e82df..ce540903d2c7be95ebe40ab8049abf9ac2ef05ed 100644 --- a/src/metabase/models/params.clj +++ b/src/metabase/models/params.clj @@ -69,9 +69,13 @@ cases where more than one name Field exists for a Table, this just adds the first one it finds." [fields] (when-let [table-ids (seq (map :table_id fields))] - (u/key-by :table_id (db/select Field:params-columns-only - :table_id [:in table-ids] - :special_type (mdb/isa :type/Name))))) + (u/key-by :table_id (-> (db/select Field:params-columns-only + :table_id [:in table-ids] + :special_type (mdb/isa :type/Name)) + ;; run `metabase.models.field/infer-has-field-values` on these Fields so their values of + ;; `has_field_values` will be consistent with what the FE expects. (e.g. we'll return + ;; `list` instead of `auto-list`.) + (hydrate :has_field_values))))) (defn add-name-field "For all `fields` that are `:type/PK` Fields, look for a `:type/Name` Field belonging to the same Table. For each diff --git a/src/metabase/plugins.clj b/src/metabase/plugins.clj index 65ecef26c522823b4a87296022cf8ae5a62019be..3b5b4733c6a926b7ecb2e6d656a193bf14cfc49c 100644 --- a/src/metabase/plugins.clj +++ b/src/metabase/plugins.clj @@ -1,14 +1,22 @@ (ns metabase.plugins + "The Metabase 'plugins' system automatically adds JARs in the `./plugins/` directory (parallel to `metabase.jar`) to + the classpath at runtime. This works great on Java 7 and 8, but never really worked properly on Java 9; as of 0.29.4 + we're planning on telling people to just add extra external dependencies with `-cp` when using Java 9." (:require [clojure.java.io :as io] [clojure.tools.logging :as log] [dynapath.util :as dynapath] [metabase [config :as config] - [util :as u]]) - (:import [java.net URL URLClassLoader])) + [util :as u]] + [puppetlabs.i18n.core :refer [trs]])) + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | Java 7 + 8 | +;;; +----------------------------------------------------------------------------------------------------------------+ (defn- plugins-dir - "The Metabase plugins directory. This defaults to `plugins/` in the same directory as `metabase.jar`, but can be configured via the env var `MB_PLUGINS_DIR`." + "The Metabase plugins directory. This defaults to `plugins/` in the same directory as `metabase.jar`, but can be + configured via the env var `MB_PLUGINS_DIR`." ^java.io.File [] (let [dir (io/file (or (config/config-str :mb-plugins-dir) (str (System/getProperty "user.dir") "/plugins")))] @@ -24,15 +32,48 @@ (let [sysloader (ClassLoader/getSystemClassLoader)] (dynapath/add-classpath-url sysloader (.toURL (.toURI jar-file))))) -(defn load-plugins! +(defn dynamically-add-jars! "Dynamically add any JARs in the `plugins-dir` to the classpath. - This is used for things like custom plugins or the Oracle JDBC driver, which cannot be shipped alongside Metabase for licensing reasons." + This is used for things like custom plugins or the Oracle JDBC driver, which cannot be shipped alongside Metabase + for licensing reasons." [] (when-let [^java.io.File dir (plugins-dir)] - (log/info (format "Loading plugins in directory %s..." dir)) + (log/info (trs "Loading plugins in directory {0}..." dir)) (doseq [^java.io.File file (.listFiles dir) :when (and (.isFile file) (.canRead file) (re-find #"\.jar$" (.getPath file)))] - (log/info (u/format-color 'magenta "Loading plugin %s... %s" file (u/emoji "🔌"))) + (log/info (u/format-color 'magenta (str (trs "Loading plugin {0}... " file) (u/emoji "🔌")))) (add-jar-to-classpath! file)))) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | Java 9 | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(defn- show-java-9-message + "Show a message telling people how to add external dependencies on Java 9 if they have some in the `./plugins` + directory." + [] + (when-let [plugins-dir (plugins-dir)] + (when (seq (.listFiles plugins-dir)) + (log/info + (trs "It looks like you have some external dependencies in your Metabase plugins directory.") + (trs "With Java 9 or higher, Metabase cannot automatically add them to your classpath.") + (trs "Instead, you should include them at launch with the -cp option. For example:") + "\n\n java -cp metabase.jar:plugins/* metabase.core\n\n" + (trs "See https://metabase.com/docs/latest/operations-guide/start.html#java-versions for more details.") + (trs "(If you're already running Metabase this way, you can ignore this message.)"))))) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | load-plugins! | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(defn load-plugins! + "Dynamically add JARs if we're running on Java 7 or 8. For Java 9, where this doesn't work, just display a nice + message instead." + [] + (if (u/is-java-9-or-higher?) + (show-java-9-message) + (dynamically-add-jars!))) diff --git a/src/metabase/sync/analyze/classifiers/category.clj b/src/metabase/sync/analyze/classifiers/category.clj index 705d6268cc8a8c37d306bb6af8a35b38ee14b1b5..7c7a49373808884c643bd73ea3899d7de7ee1a80 100644 --- a/src/metabase/sync/analyze/classifiers/category.clj +++ b/src/metabase/sync/analyze/classifiers/category.clj @@ -11,7 +11,9 @@ determined by the cardinality of the Field, like Category status. Thus it is entirely possibly for a Field to be both a Category and a `list` Field." (:require [clojure.tools.logging :as log] - [metabase.models.field-values :as field-values] + [metabase.models + [field :as field] + [field-values :as field-values]] [metabase.sync [interface :as i] [util :as sync-util]] @@ -28,7 +30,8 @@ (isa? special-type :type/PK) (isa? special-type :type/FK))) -(defn- field-should-be-category? [distinct-count field] +(s/defn ^:private field-should-be-category? :- (s/maybe s/Bool) + [distinct-count :- s/Int, field :- su/Map] ;; only mark a Field as a Category if it doesn't already have a special type (when-not (:special_type field) (when (<= distinct-count field-values/category-cardinality-threshold) @@ -38,19 +41,20 @@ field-values/category-cardinality-threshold)) true))) -(defn- field-should-be-list? [distinct-count field] - {:pre [(contains? field :has_field_values)]} +(s/defn ^:private field-should-be-auto-list? :- (s/maybe s/Bool) + "Based on `distinct-count`, should we mark this `field` as `has_field_values` = `auto-list`?" + [distinct-count :- s/Int, field :- {:has_field_values (s/maybe (apply s/enum field/has-field-values-options)) + s/Keyword s/Any}] ;; only update has_field_values if it hasn't been set yet. If it's already been set then it was probably done so ;; manually by an admin, and we don't want to stomp over their choices. (when (nil? (:has_field_values field)) - (when (<= distinct-count field-values/list-cardinality-threshold) + (when (<= distinct-count field-values/auto-list-cardinality-threshold) (log/debug (format "%s has %d distinct values. Since that is less than %d, it should have cached FieldValues." (sync-util/name-for-logging field) distinct-count - field-values/list-cardinality-threshold)) + field-values/auto-list-cardinality-threshold)) true))) - (s/defn infer-is-category-or-list :- (s/maybe i/FieldInstance) "Classifier that attempts to determine whether FIELD ought to be marked as a Category based on its distinct count." [field :- i/FieldInstance, fingerprint :- (s/maybe i/Fingerprint)] @@ -58,5 +62,5 @@ (when-not (cannot-be-category-or-list? (:base_type field) (:special_type field)) (when-let [distinct-count (get-in fingerprint [:global :distinct-count])] (cond-> field - (field-should-be-category? distinct-count field) (assoc :special_type :type/Category) - (field-should-be-list? distinct-count field) (assoc :has_field_values "list")))))) + (field-should-be-category? distinct-count field) (assoc :special_type :type/Category) + (field-should-be-auto-list? distinct-count field) (assoc :has_field_values :auto-list)))))) diff --git a/src/metabase/sync/analyze/fingerprint/global.clj b/src/metabase/sync/analyze/fingerprint/global.clj index 3fa55fd999ce3cf03b3f9346c86d9358553d2449..81b1252df06bfc7e80203e58002f429da09d94d3 100644 --- a/src/metabase/sync/analyze/fingerprint/global.clj +++ b/src/metabase/sync/analyze/fingerprint/global.clj @@ -7,7 +7,7 @@ "Generate a fingerprint of global information for Fields of all types." [values :- i/FieldSample] ;; TODO - this logic isn't as nice as the old logic that actually called the DB - ;; We used to do (queries/field-distinct-count field field-values/list-cardinality-threshold) + ;; We used to do (queries/field-distinct-count field field-values/auto-list-cardinality-threshold) ;; Consider whether we are so married to the idea of only generating fingerprints from samples that we ;; are ok with inaccurate counts like the one we'll surely be getting here {:distinct-count (count (distinct values))}) diff --git a/src/metabase/util.clj b/src/metabase/util.clj index 3597631b3704717a27dce9ed2106733ef528598b..8954c67e60065797969d67614d27e7ea8f5b2e7c 100644 --- a/src/metabase/util.clj +++ b/src/metabase/util.clj @@ -598,3 +598,13 @@ (first (keep-indexed (fn [i x] (when (pred x) i)) coll))) + + +(defn is-java-9-or-higher? + "Are we running on Java 9 or above?" + [] + (when-let [java-major-version (some-> (System/getProperty "java.version") + (s/split #"\.") + first + Integer/parseInt)] + (>= java-major-version 9))) diff --git a/test/metabase/automagic_dashboards/core_test.clj b/test/metabase/automagic_dashboards/core_test.clj index 8e79c3634f5861a50725a5d4ef97da3edd7c3f72..6b57bfd03aa0ac9bb14ee7eca4629e530fccbb08 100644 --- a/test/metabase/automagic_dashboards/core_test.clj +++ b/test/metabase/automagic_dashboards/core_test.clj @@ -50,9 +50,20 @@ (->> (data/id :users) Table (#'magic/->root) - (#'magic/matching-rules (rules/load-rules "table")) + (#'magic/matching-rules (rules/get-rules ["table"])) (map (comp first :applies_to)))) +;; Test fallback to GenericTable +(expect + [:entity/GenericTable :entity/*] + (->> (-> (data/id :users) + Table + (assoc :entity_type nil) + (#'magic/->root)) + (#'magic/matching-rules (rules/get-rules ["table"])) + (map (comp first :applies_to)))) + + (defn- collect-urls [dashboard] (->> dashboard @@ -195,34 +206,20 @@ (expect - [true true] - (tt/with-temp* [Table [{table-id :id}]] - (with-rasta - (with-dashboard-cleanup - (let [table (Table table-id) - not-analyzed-result (automagic-analysis table {}) - analyzed-result (-> table - (assoc :entity_type :entity/GenericTable) - (automagic-analysis {}))] - [(nil? not-analyzed-result) (some? analyzed-result)]))))) + 3 + (with-rasta + (->> (Database (data/id)) candidate-tables first :tables count))) +;; /candidates should work with unanalyzed tables (expect + 1 (tt/with-temp* [Database [{db-id :id}] Table [{table-id :id} {:db_id db-id}] Field [{} {:table_id table-id}] Field [{} {:table_id table-id}]] (with-rasta (with-dashboard-cleanup - (let [database (Database db-id) - not-analyzed-count (count (candidate-tables database))] - (db/update! Table table-id :entity_type :entity/GenericTable) - (= (inc not-analyzed-count) (count (candidate-tables database)))))))) - - -(expect - 3 - (with-rasta - (->> (Database (data/id)) candidate-tables first :tables count))) + (count (candidate-tables (Database db-id))))))) ;; Identity diff --git a/test/metabase/automagic_dashboards/rules_test.clj b/test/metabase/automagic_dashboards/rules_test.clj index 67dffe23eca2f9dd1fe08aa46ff747719797e964..962e9428256c35bc4c9bd2df500253dec7eab0fd 100644 --- a/test/metabase/automagic_dashboards/rules_test.clj +++ b/test/metabase/automagic_dashboards/rules_test.clj @@ -14,9 +14,12 @@ (expect "ga:foo" (#'rules/->type "ga:foo")) (expect :type/Foo (#'rules/->type "Foo")) -(expect (every? some? (load-rules "table"))) -(expect (every? some? (load-rules "metrics"))) -(expect (every? some? (load-rules "fields"))) +;; This also tests that all the rules are valid (else there would be nils returned) +(expect (every? some? (get-rules ["table"]))) +(expect (every? some? (get-rules ["metrics"]))) +(expect (every? some? (get-rules ["fields"]))) + +(expect (some? (get-rules ["table" "GenericTable" "ByCountry"]))) (expect true (dimension-form? [:dimension "Foo"])) (expect true (dimension-form? ["dimension" "Foo"])) diff --git a/test/metabase/models/field_values_test.clj b/test/metabase/models/field_values_test.clj index fb1e94c0d67a97b88a2664ccbd79cc0974ef82a7..e00c0525bee4701b18649340dee82185f4dbd876 100644 --- a/test/metabase/models/field_values_test.clj +++ b/test/metabase/models/field_values_test.clj @@ -16,19 +16,19 @@ ;;; ---------------------------------------- field-should-have-field-values? ----------------------------------------- -(expect (field-should-have-field-values? {:has_field_values "list" +(expect (field-should-have-field-values? {:has_field_values :list :visibility_type :normal :base_type :type/Text})) -(expect false (field-should-have-field-values? {:has_field_values "list" +(expect false (field-should-have-field-values? {:has_field_values :list :visibility_type :sensitive :base_type :type/Text})) -(expect false (field-should-have-field-values? {:has_field_values "list" +(expect false (field-should-have-field-values? {:has_field_values :list :visibility_type :hidden :base_type :type/Text})) -(expect false (field-should-have-field-values? {:has_field_values "list" +(expect false (field-should-have-field-values? {:has_field_values :list :visibility_type :details-only :base_type :type/Text})) @@ -36,11 +36,11 @@ :visibility_type :normal :base_type :type/Text})) -(expect (field-should-have-field-values? {:has_field_values "list" +(expect (field-should-have-field-values? {:has_field_values :list :visibility_type :normal :base_type :type/Text})) -(expect (field-should-have-field-values? {:has_field_values "list" +(expect (field-should-have-field-values? {:has_field_values :list :special_type :type/Category :visibility_type :normal :base_type "type/Boolean"})) @@ -48,32 +48,32 @@ ;; retired/sensitive/hidden/details-only fields should always be excluded (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :retired})) (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :sensitive})) (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :hidden})) (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :details-only})) ;; date/time based fields should always be excluded (expect false (field-should-have-field-values? {:base_type :type/Date - :has_field_values "list" + :has_field_values :list :visibility_type :normal})) (expect false (field-should-have-field-values? {:base_type :type/DateTime - :has_field_values "list" + :has_field_values :list :visibility_type :normal})) (expect false (field-should-have-field-values? {:base_type :type/Time - :has_field_values "list" + :has_field_values :list :visibility_type :normal})) diff --git a/test/metabase/models/params_test.clj b/test/metabase/models/params_test.clj index 743fd0d451850336c5cc42c2d4d1348975c3f0b5..c6c9e45898cc934de1d8377ccd96fd35214aa099 100644 --- a/test/metabase/models/params_test.clj +++ b/test/metabase/models/params_test.clj @@ -4,8 +4,7 @@ [metabase.api.public-test :as public-test] [metabase.models [card :refer [Card]] - [field :refer [Field]] - [params :as params]] + [field :refer [Field]]] [metabase.test.data :as data] [toucan [db :as db] @@ -24,7 +23,7 @@ :display_name "Name" :base_type :type/Text :special_type :type/Name - :has_field_values "list"}} + :has_field_values :list}} (-> (db/select-one [Field :name :table_id :special_type], :id (data/id :venues :id)) (hydrate :name_field))) @@ -70,7 +69,7 @@ :display_name "Name" :base_type :type/Text :special_type :type/Name - :has_field_values "list"} + :has_field_values :list} :dimensions []}} (tt/with-temp Card [card {:dataset_query {:database (data/id) @@ -96,7 +95,7 @@ :display_name "Name" :base_type :type/Text :special_type :type/Name - :has_field_values "list"} + :has_field_values :list} :dimensions []}} (public-test/with-sharing-enabled-and-temp-dashcard-referencing :venues :id [dashboard] (-> (hydrate dashboard :param_fields) diff --git a/test/metabase/sync/analyze/classifiers/category_test.clj b/test/metabase/sync/analyze/classifiers/category_test.clj index f258bdbd2a2246558794220c1f7d1f2ac1880cfa..96b6819fab128bc926b08f03d9e712da0bb87248 100644 --- a/test/metabase/sync/analyze/classifiers/category_test.clj +++ b/test/metabase/sync/analyze/classifiers/category_test.clj @@ -3,25 +3,34 @@ (:require [expectations :refer :all] [metabase.sync.analyze.classifiers.category :as category-classifier])) +(defn- field-with-distinct-count [distinct-count] + {:database_type "VARCHAR" + :special_type :type/Name + :name "NAME" + :fingerprint_version 1 + :has_field_values nil + :active true + :visibility_type :normal + :preview_display true + :display_name "Name" + :fingerprint {:global {:distinct-count distinct-count} + :type + {:type/Text + {:percent-json 0.0 + :percent-url 0.0 + :percent-email 0.0 + :average-length 13.516}}} + :base_type :type/Text}) + ;; make sure the logic for deciding whether a Field should be a list works as expected (expect nil - (#'category-classifier/field-should-be-list? + (#'category-classifier/field-should-be-auto-list? 2500 - {:database_type "VARCHAR" - :special_type :type/Name - :name "NAME" - :fingerprint_version 1 - :has_field_values nil - :active true - :visibility_type :normal - :preview_display true - :display_name "Name" - :fingerprint {:global {:distinct-count 2500} - :type - {:type/Text - {:percent-json 0.0 - :percent-url 0.0 - :percent-email 0.0 - :average-length 13.516}}} - :base_type :type/Text})) + (field-with-distinct-count 2500))) + +(expect + true + (#'category-classifier/field-should-be-auto-list? + 99 + (field-with-distinct-count 99))) diff --git a/test/metabase/sync/field_values_test.clj b/test/metabase/sync/field_values_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..96678cac862829148beadb27c783f3a38b63dd1e --- /dev/null +++ b/test/metabase/sync/field_values_test.clj @@ -0,0 +1,127 @@ +(ns metabase.sync.field-values-test + "Tests around the way Metabase syncs FieldValues, and sets the values of `field.has_field_values`." + (:require [expectations :refer :all] + [metabase + [sync :as sync] + [util :as u]] + [metabase.models + [field :refer [Field]] + [field-values :as field-values :refer [FieldValues]] + [table :refer [Table]]] + [metabase.test.data :as data] + [metabase.test.data.one-off-dbs :as one-off-dbs] + [toucan.db :as db])) + +;; Test that when we delete FieldValues syncing the Table again will cause them to be re-created +(defn- venues-price-field-values [] + (db/select-one-field :values FieldValues, :field_id (data/id :venues :price))) + +(expect + {1 [1 2 3 4] + 2 nil + 3 [1 2 3 4]} + (array-map + ;; 1. Check that we have expected field values to start with + 1 (venues-price-field-values) + ;; 2. Delete the Field values, make sure they're gone + 2 (do (db/delete! FieldValues :field_id (data/id :venues :price)) + (venues-price-field-values)) + ;; 3. Now re-sync the table and make sure they're back + 3 (do (sync/sync-table! (Table (data/id :venues))) + (venues-price-field-values)))) + +;; Test that syncing will cause FieldValues to be updated +(expect + {1 [1 2 3 4] + 2 [1 2 3] + 3 [1 2 3 4]} + (array-map + ;; 1. Check that we have expected field values to start with + 1 (venues-price-field-values) + ;; 2. Update the FieldValues, remove one of the values that should be there + 2 (do (db/update! FieldValues (db/select-one-id FieldValues :field_id (data/id :venues :price)) + :values [1 2 3]) + (venues-price-field-values)) + ;; 3. Now re-sync the table and make sure the value is back + 3 (do (sync/sync-table! (Table (data/id :venues))) + (venues-price-field-values)))) + + +;; A Field with 50 values should get marked as `auto-list` on initial sync, because it should be 'list', but was +;; marked automatically, as opposed to explicitly (`list`) +(expect + :auto-list + (one-off-dbs/with-blueberries-db + ;; insert 50 rows & sync + (one-off-dbs/insert-rows-and-sync! (range 50)) + ;; has_field_values should be auto-list + (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)))) + +;; ... and it should also have some FieldValues +(expect + #metabase.models.field_values.FieldValuesInstance + {:values [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 + 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49] + :human_readable_values {}} + (one-off-dbs/with-blueberries-db + (one-off-dbs/insert-rows-and-sync! (range 50)) + (db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num)))) + +;; ok, but if the number grows past the threshold & we sync again it should get unmarked as auto-list and set back to +;; `nil` (#3215) +(expect + nil + (one-off-dbs/with-blueberries-db + ;; insert 50 bloobs & sync. has_field_values should be auto-list + (one-off-dbs/insert-rows-and-sync! (range 50)) + (assert (= (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)) + :auto-list)) + ;; now insert enough bloobs to put us over the limit and re-sync. + (one-off-dbs/insert-rows-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; has_field_values should have been set to nil. + (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)))) + +;; ...its FieldValues should also get deleted. +(expect + nil + (one-off-dbs/with-blueberries-db + ;; do the same steps as the test above... + (one-off-dbs/insert-rows-and-sync! (range 50)) + (one-off-dbs/insert-rows-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; ///and FieldValues should also have been deleted. + (db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num)))) + +;; If we had explicitly marked the Field as `list` (instead of `auto-list`), adding extra values shouldn't change +;; anything! +(expect + :list + (one-off-dbs/with-blueberries-db + ;; insert 50 bloobs & sync + (one-off-dbs/insert-rows-and-sync! (range 50)) + ;; change has_field_values to list + (db/update! Field (data/id :blueberries_consumed :num) :has_field_values "list") + ;; insert more bloobs & re-sync + (one-off-dbs/insert-rows-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; has_field_values shouldn't change + (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)))) + +;; it should still have FieldValues, and have new ones for the new Values. It should have 200 values even though this +;; is past the normal limit of 100 values! +(expect + #metabase.models.field_values.FieldValuesInstance + {:values [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 + 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 + 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 + 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 + 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 + 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 + 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 + 189 190 191 192 193 194 195 196 197 198 199] + :human_readable_values {}} + (one-off-dbs/with-blueberries-db + ;; follow the same steps as the test above... + (one-off-dbs/insert-rows-and-sync! (range 50)) + (db/update! Field (data/id :blueberries_consumed :num) :has_field_values "list") + (one-off-dbs/insert-rows-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; ... and FieldValues should still be there, but this time updated to include the new values! + (db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num)))) diff --git a/test/metabase/sync/sync_metadata/fields_test.clj b/test/metabase/sync/sync_metadata/fields_test.clj index 2dbde44ddefd8c5a51fb82f6980d66a60bd41136..6cff1212d141c39fb8a3e74df16b5044e41c6dcc 100644 --- a/test/metabase/sync/sync_metadata/fields_test.clj +++ b/test/metabase/sync/sync_metadata/fields_test.clj @@ -2,24 +2,26 @@ "Tests for the logic that syncs Field models with the Metadata fetched from a DB. (There are more tests for this behavior in the namespace `metabase.sync-database.sync-dynamic-test`, which is sort of a misnomer.)" (:require [clojure.java.jdbc :as jdbc] - [expectations :refer [expect]] + [expectations :refer :all] [metabase - [db :as mdb] - [driver :as driver] [query-processor :as qp] [sync :as sync] [util :as u]] - [metabase.driver.generic-sql :as sql] [metabase.models [database :refer [Database]] [field :refer [Field]] [table :refer [Table]]] - [metabase.test.data.interface :as tdi] + [metabase.test.data :as data] + [metabase.test.data.one-off-dbs :as one-off-dbs] [toucan [db :as db] [hydrate :refer [hydrate]]] [toucan.util.test :as tt])) +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | Dropping & Undropping Columns | +;;; +----------------------------------------------------------------------------------------------------------------+ + (defn- with-test-db-before-and-after-dropping-a-column "Testing function that performs the following steps: @@ -29,40 +31,32 @@ 4. Executes `(f database)` a second time 5. Returns a map containing results from both calls to `f` for comparison." [f] - ;; let H2 connect to DBs that aren't created yet - (binding [mdb/*allow-potentailly-unsafe-connections* true] - (let [driver (driver/engine->driver :h2) - details (tdi/database->connection-details driver :db {:db "mem:deleted_columns_test" - :database-name "deleted_columns_test"}) - exec! (fn [& statements] - (doseq [statement statements] - (jdbc/execute! (sql/connection-details->spec driver details) statement)))] - ;; first, create a new in-memory test DB and add some data to it - (exec! - ;; H2 needs that 'guest' user for QP purposes. Set that up - "CREATE USER IF NOT EXISTS GUEST PASSWORD 'guest';" - ;; Keep DB open until we say otherwise :) - "SET DB_CLOSE_DELAY -1;" - ;; create table & load data - "DROP TABLE IF EXISTS \"birds\";" - "CREATE TABLE \"birds\" (\"species\" VARCHAR PRIMARY KEY, \"example_name\" VARCHAR);" - "GRANT ALL ON \"birds\" TO GUEST;" - (str "INSERT INTO \"birds\" (\"species\", \"example_name\") VALUES " - "('House Finch', 'Marshawn Finch'), " - "('California Gull', 'Steven Seagull'), " - "('Chicken', 'Colin Fowl');")) - ;; now create MB models + sync - (tt/with-temp Database [database {:engine :h2, :details details, :name "Deleted Columns Test"}] - (sync/sync-database! database) - ;; ok, let's see what (f) gives us - (let [f-before (f database)] - ;; ok cool! now delete one of those columns... - (exec! "ALTER TABLE \"birds\" DROP COLUMN \"example_name\";") - ;; ...and re-sync... - (sync/sync-database! database) - ;; ...now let's see how (f) may have changed! Compare to original. - {:before-drop f-before - :after-drop (f database)}))))) + ;; first, create a new in-memory test DB and add some data to it + (one-off-dbs/with-blank-db + (doseq [statement [ ;; H2 needs that 'guest' user for QP purposes. Set that up + "CREATE USER IF NOT EXISTS GUEST PASSWORD 'guest';" + ;; Keep DB open until we say otherwise :) + "SET DB_CLOSE_DELAY -1;" + ;; create table & load data + "DROP TABLE IF EXISTS \"birds\";" + "CREATE TABLE \"birds\" (\"species\" VARCHAR PRIMARY KEY, \"example_name\" VARCHAR);" + "GRANT ALL ON \"birds\" TO GUEST;" + (str "INSERT INTO \"birds\" (\"species\", \"example_name\") VALUES " + "('House Finch', 'Marshawn Finch'), " + "('California Gull', 'Steven Seagull'), " + "('Chicken', 'Colin Fowl');")]] + (jdbc/execute! one-off-dbs/*conn* [statement])) + ;; now sync + (sync/sync-database! (data/db)) + ;; ok, let's see what (f) gives us + (let [f-before (f (data/db))] + ;; ok cool! now delete one of those columns... + (jdbc/execute! one-off-dbs/*conn* ["ALTER TABLE \"birds\" DROP COLUMN \"example_name\";"]) + ;; ...and re-sync... + (sync/sync-database! (data/db)) + ;; ...now let's see how (f) may have changed! Compare to original. + {:before-drop f-before + :after-drop (f (data/db))}))) ;; make sure sync correctly marks a Field as active = false when it gets dropped from the DB @@ -110,3 +104,217 @@ :data :native_form :query)))) + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | PK & FK Syncing | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(defn- force-sync-table! + "Updates the `:fields_hash` to ensure that the sync process will include fields in the sync" + [table] + (db/update! Table (u/get-id table), :fields_hash "something new") + (sync/sync-table! (Table (data/id :venues)))) + +;; Test PK Syncing +(expect [:type/PK + nil + :type/PK + :type/Latitude + :type/PK] + (let [get-special-type (fn [] (db/select-one-field :special_type Field, :id (data/id :venues :id)))] + [;; Special type should be :id to begin with + (get-special-type) + ;; Clear out the special type + (do (db/update! Field (data/id :venues :id), :special_type nil) + (get-special-type)) + ;; Calling sync-table! should set the special type again + (do (force-sync-table! (data/id :venues)) + (get-special-type)) + ;; sync-table! should *not* change the special type of fields that are marked with a different type + (do (db/update! Field (data/id :venues :id), :special_type :type/Latitude) + (get-special-type)) + ;; Make sure that sync-table runs set-table-pks-if-needed! + (do (db/update! Field (data/id :venues :id), :special_type nil) + (force-sync-table! (Table (data/id :venues))) + (get-special-type))])) + + +;; Check that Foreign Key relationships were created on sync as we expect +(expect (data/id :venues :id) + (db/select-one-field :fk_target_field_id Field, :id (data/id :checkins :venue_id))) + +(expect (data/id :users :id) + (db/select-one-field :fk_target_field_id Field, :id (data/id :checkins :user_id))) + +(expect (data/id :categories :id) + (db/select-one-field :fk_target_field_id Field, :id (data/id :venues :category_id))) + +;; Check that sync-table! causes FKs to be set like we'd expect +(expect [{:special_type :type/FK, :fk_target_field_id true} + {:special_type nil, :fk_target_field_id false} + {:special_type :type/FK, :fk_target_field_id true}] + (let [field-id (data/id :checkins :user_id) + get-special-type-and-fk-exists? (fn [] + (into {} (-> (db/select-one [Field :special_type :fk_target_field_id], + :id field-id) + (update :fk_target_field_id #(db/exists? Field :id %)))))] + [ ;; FK should exist to start with + (get-special-type-and-fk-exists?) + ;; Clear out FK / special_type + (do (db/update! Field field-id, :special_type nil, :fk_target_field_id nil) + (get-special-type-and-fk-exists?)) + ;; Run sync-table and they should be set again + (let [table (Table (data/id :checkins))] + (sync/sync-table! table) + (get-special-type-and-fk-exists?))])) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | mystery narrow-to-min-max test | +;;; +----------------------------------------------------------------------------------------------------------------+ + +;; TODO - hey, what is this testing? If you wrote this test, please explain what's going on here +(defn- narrow-to-min-max [row] + (-> row + (get-in [:type :type/Number]) + (select-keys [:min :max]) + (update :min #(u/round-to-decimals 4 %)) + (update :max #(u/round-to-decimals 4 %)))) + +(expect + [{:min -165.374 :max -73.9533} + {:min 10.0646 :max 40.7794}] + (tt/with-temp* [Database [database {:details (:details (Database (data/id))), :engine :h2}] + Table [table {:db_id (u/get-id database), :name "VENUES"}]] + (sync/sync-table! table) + (map narrow-to-min-max + [(db/select-one-field :fingerprint Field, :id (data/id :venues :longitude)) + (db/select-one-field :fingerprint Field, :id (data/id :venues :latitude))]))) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | tests related to sync's Field hashes | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(defn- exec! [& statements] + (doseq [statement statements] + (jdbc/execute! one-off-dbs/*conn* [statement]))) + +(defmacro ^:private throw-if-called {:style/indent 1} [fn-var & body] + `(with-redefs [~fn-var (fn [& args#] + (throw (RuntimeException. "Should not be called!")))] + ~@body)) + +;; Validate the changing of a column's type triggers a hash miss and sync +(expect + [ ;; Original column type + "SMALLINT" + ;; Altered the column, now it's an integer + "INTEGER" + ;; Original hash and the new one are not equal + false + ;; Reruning sync shouldn't change the hash + true] + (one-off-dbs/with-blueberries-db + (one-off-dbs/insert-rows-and-sync! (range 50)) + ;; After this sync, we know about the new table and it's SMALLINT column + (let [table-id (data/id :blueberries_consumed) + get-table #(Table (data/id :blueberries_consumed)) + get-field #(Field (data/id :blueberries_consumed :num)) + {old-hash :fields_hash} (get-table) + {old-db-type :database_type} (get-field)] + ;; Change the column from SMALLINT to INTEGER. In clojure-land these are both integers, but this change + ;; should trigger a hash miss and thus resync the table, since something has changed + (exec! "ALTER TABLE blueberries_consumed ALTER COLUMN num INTEGER") + (sync/sync-database! (data/db)) + (let [{new-hash :fields_hash} (get-table) + {new-db-type :database_type} (get-field)] + + ;; Syncing again with no change should not call sync-field-instances! or update the hash + (throw-if-called metabase.sync.sync-metadata.fields/sync-field-instances! + (sync/sync-database! (data/db)) + [old-db-type + new-db-type + (= old-hash new-hash) + (= new-hash (:fields_hash (get-table)))]))))) + +(defn- table-md-with-hash [table-id] + {:active-fields (count (db/select Field :table_id table-id :active true)) + :inactive-fields (count (db/select Field :table_id table-id :active false)) + :fields-hash (:fields_hash (db/select-one Table :id table-id))}) + +(defn- no-fields-hash [m] + (dissoc m :fields-hash)) + +;; This tests a table that adds a column, ensures sync picked up the new column and the hash changed +(expect + [ + ;; Only the num column should be found + {:active-fields 1, :inactive-fields 0} + ;; Add a column, should still be no inactive + {:active-fields 2, :inactive-fields 0} + ;; Adding a column should make the hashes not equal + false + ] + (one-off-dbs/with-blueberries-db + (one-off-dbs/insert-rows-and-sync! (range 50)) + ;; We should now have a hash value for num as a SMALLINT + (let [before-table-md (table-md-with-hash (data/id :blueberries_consumed)) + _ (exec! "ALTER TABLE blueberries_consumed ADD COLUMN weight FLOAT") + _ (sync/sync-database! (data/db)) + ;; Now that hash will include num and weight + after-table-md (table-md-with-hash (data/id :blueberries_consumed))] + [(no-fields-hash before-table-md) + (no-fields-hash after-table-md) + (= (:fields-hash before-table-md) + (:fields-hash after-table-md))]))) + +;; Drops a column, ensures sync finds the drop, updates the hash +(expect + [ + ;; Test starts with two columns + {:active-fields 2, :inactive-fields 0} + ;; Dropped the weight column + {:active-fields 1, :inactive-fields 1} + ;; Hashes should be different without the weight column + false] + (one-off-dbs/with-blank-db + ;; create a DB that has 2 columns this time instead of 1 + (exec! "CREATE TABLE blueberries_consumed (num SMALLINT NOT NULL, weight FLOAT)") + (one-off-dbs/insert-rows-and-sync! (range 50)) + ;; We should now have a hash value for num as a SMALLINT + (let [before-table-md (table-md-with-hash (data/id :blueberries_consumed)) + _ (exec! "ALTER TABLE blueberries_consumed DROP COLUMN weight") + _ (sync/sync-database! (data/db)) + ;; Now that hash will include num and weight + after-table-md (table-md-with-hash (data/id :blueberries_consumed))] + [(no-fields-hash before-table-md) + (no-fields-hash after-table-md) + (= (:fields-hash before-table-md) + (:fields-hash after-table-md))]))) + +;; Drops and readds a column, ensures that the hash is back to it's original value +(expect + [ + ;; Both num and weight columns should be found + {:active-fields 2, :inactive-fields 0} + ;; Both columns should still be present + {:active-fields 2, :inactive-fields 0} + ;; The hashes should be the same + true] + (one-off-dbs/with-blank-db + ;; create a DB that has 2 columns this time instead of 1 + (exec! "CREATE TABLE blueberries_consumed (num SMALLINT NOT NULL, weight FLOAT)") + (one-off-dbs/insert-rows-and-sync! (range 50)) + ;; We should now have a hash value for num as a SMALLINT + (let [before-table-md (table-md-with-hash (data/id :blueberries_consumed)) + _ (exec! "ALTER TABLE blueberries_consumed DROP COLUMN weight") + _ (sync/sync-database! (data/db)) + _ (exec! "ALTER TABLE blueberries_consumed ADD COLUMN weight FLOAT") + _ (sync/sync-database! (data/db)) + ;; Now that hash will include num and weight + after-table-md (table-md-with-hash (data/id :blueberries_consumed))] + [(no-fields-hash before-table-md) + (no-fields-hash after-table-md) + (= (:fields-hash before-table-md) + (:fields-hash after-table-md))])) ) diff --git a/test/metabase/sync/util_test.clj b/test/metabase/sync/util_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..2b0cbbc5f25332366d74cd4f783752afb7537f44 --- /dev/null +++ b/test/metabase/sync/util_test.clj @@ -0,0 +1,55 @@ +(ns metabase.sync.util-test + "Tests for the utility functions shared by all parts of sync, such as the duplicate ops guard." + (:require [expectations :refer :all] + [metabase + [driver :as driver] + [sync :as sync]] + [metabase.models.database :refer [Database]] + [toucan.util.test :as tt])) + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | Duplicate Sync Prevention | +;;; +----------------------------------------------------------------------------------------------------------------+ + +;; test that we prevent running simultaneous syncs on the same database + +(defonce ^:private calls-to-describe-database (atom 0)) + +(defrecord ConcurrentSyncTestDriver [] + clojure.lang.Named + (getName [_] "ConcurrentSyncTestDriver")) + +(extend ConcurrentSyncTestDriver + driver/IDriver + (merge driver/IDriverDefaultsMixin + {:describe-database (fn [_ _] + (swap! calls-to-describe-database inc) + (Thread/sleep 1000) + {:tables #{}}) + :describe-table (constantly nil) + :details-fields (constantly [])})) + +(driver/register-driver! :concurrent-sync-test (ConcurrentSyncTestDriver.)) + +;; only one sync should be going on at a time +(expect + ;; describe-database gets called twice during a single sync process, once for syncing tables and a second time for + ;; syncing the _metabase_metadata table + 2 + (tt/with-temp* [Database [db {:engine :concurrent-sync-test}]] + (reset! calls-to-describe-database 0) + ;; start a sync processes in the background. It should take 1000 ms to finish + (let [f1 (future (sync/sync-database! db)) + f2 (do + ;; wait 200 ms to make sure everything is going + (Thread/sleep 200) + ;; Start another in the background. Nothing should happen here because the first is already running + (future (sync/sync-database! db)))] + ;; Start another in the foreground. Again, nothing should happen here because the original should still be + ;; running + (sync/sync-database! db) + ;; make sure both of the futures have finished + (deref f1) + (deref f2) + ;; Check the number of syncs that took place. Should be 2 (just the first) + @calls-to-describe-database))) diff --git a/test/metabase/sync_database_test.clj b/test/metabase/sync_database_test.clj index b369ff0ddf382b12a10755bf8de1ae41a20017ea..3fca12bcc7af2b50956147819db3841fb006e887 100644 --- a/test/metabase/sync_database_test.clj +++ b/test/metabase/sync_database_test.clj @@ -1,27 +1,29 @@ -(ns metabase.sync-database-test +(ns ^:deprecated metabase.sync-database-test "Tests for sync behavior that use a imaginary `SyncTestDriver`. These are kept around mainly because they've already - been written. For newer sync tests see `metabase.sync.*` test namespaces." - (:require [clojure.java.jdbc :as jdbc] - [clojure.string :as str] - [expectations :refer :all] + been written. For newer sync tests see `metabase.sync.*` test namespaces. + + Your new tests almost certainly do not belong in this namespace. Please put them in ones mirroring the location of + the specific part of sync you're testing." + (:require [expectations :refer :all] [metabase - [db :as mdb] [driver :as driver] - [sync :refer :all] + [sync :as sync] [util :as u]] - [metabase.driver.generic-sql :as sql] [metabase.models [database :refer [Database]] [field :refer [Field]] - [field-values :as field-values :refer [FieldValues]] [table :refer [Table]]] - [metabase.test - [data :as data] - [util :as tu]] [metabase.test.mock.util :as mock-util] + [metabase.test.util :as tu] [toucan.db :as db] [toucan.util.test :as tt])) +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | End-to-end 'MovieDB' Sync Tests | +;;; +----------------------------------------------------------------------------------------------------------------+ + +;; These tests make up a fake driver and then confirm that sync uses the various methods defined by the driver to +;; correctly sync appropriate metadata rows (Table/Field/etc.) in the Application DB (def ^:private ^:const sync-test-tables {"movie" {:name "movie" @@ -169,10 +171,10 @@ :base_type :type/Text :special_type :type/PK})]})] (tt/with-temp Database [db {:engine :sync-test}] - (sync-database! db) + (sync/sync-database! db) ;; we are purposely running the sync twice to test for possible logic issues which only manifest on resync of a ;; database, such as adding tables that already exist or duplicating fields - (sync-database! db) + (sync/sync-database! db) (mapv table-details (db/select Table, :db_id (u/get-id db), {:order-by [:name]})))) @@ -203,356 +205,13 @@ Table [table {:name "movie" :schema "default" :db_id (u/get-id db)}]] - (sync-table! table) + (sync/sync-table! table) (table-details (Table (:id table))))) -;; test that we prevent running simultaneous syncs on the same database - -(defonce ^:private calls-to-describe-database (atom 0)) - -(defrecord ConcurrentSyncTestDriver [] - clojure.lang.Named - (getName [_] "ConcurrentSyncTestDriver")) - -(extend ConcurrentSyncTestDriver - driver/IDriver - (merge driver/IDriverDefaultsMixin - {:describe-database (fn [_ _] - (swap! calls-to-describe-database inc) - (Thread/sleep 1000) - {:tables #{}}) - :describe-table (constantly nil) - :details-fields (constantly [])})) - -(driver/register-driver! :concurrent-sync-test (ConcurrentSyncTestDriver.)) - -;; only one sync should be going on at a time -(expect - ;; describe-database gets called twice during a single sync process, once for syncing tables and a second time for - ;; syncing the _metabase_metadata table - 2 - (tt/with-temp* [Database [db {:engine :concurrent-sync-test}]] - (reset! calls-to-describe-database 0) - ;; start a sync processes in the background. It should take 1000 ms to finish - (let [f1 (future (sync-database! db)) - f2 (do - ;; wait 200 ms to make sure everything is going - (Thread/sleep 200) - ;; Start another in the background. Nothing should happen here because the first is already running - (future (sync-database! db)))] - ;; Start another in the foreground. Again, nothing should happen here because the original should still be - ;; running - (sync-database! db) - ;; make sure both of the futures have finished - (deref f1) - (deref f2) - ;; Check the number of syncs that took place. Should be 2 (just the first) - @calls-to-describe-database))) - - -;; Test that we will remove field-values when they aren't appropriate. Calling `sync-database!` below should cause -;; them to get removed since the Field isn't `has_field_values` = `list` -(expect - [[1 2 3] - nil] - (tt/with-temp* [Database [db {:engine :sync-test}]] - (sync-database! db) - (let [table-id (db/select-one-id Table, :schema "default", :name "movie") - field-id (db/select-one-id Field, :table_id table-id, :name "studio")] - (tt/with-temp FieldValues [_ {:field_id field-id - :values "[1,2,3]"}] - (let [initial-field-values (db/select-one-field :values FieldValues, :field_id field-id)] - (sync-database! db) - [initial-field-values - (db/select-one-field :values FieldValues, :field_id field-id)]))))) - - -;; ## Individual Helper Fns - -(defn- force-sync-table! - "Updates the `:fields_hash` to ensure that the sync process will include fields in the sync" - [table] - (db/update! Table (u/get-id table), :fields_hash "something new") - (sync-table! (Table (data/id :venues)))) - -;; ## TEST PK SYNCING -(expect [:type/PK - nil - :type/PK - :type/Latitude - :type/PK] - (let [get-special-type (fn [] (db/select-one-field :special_type Field, :id (data/id :venues :id)))] - [;; Special type should be :id to begin with - (get-special-type) - ;; Clear out the special type - (do (db/update! Field (data/id :venues :id), :special_type nil) - (get-special-type)) - ;; Calling sync-table! should set the special type again - (do (force-sync-table! (data/id :venues)) - (get-special-type)) - ;; sync-table! should *not* change the special type of fields that are marked with a different type - (do (db/update! Field (data/id :venues :id), :special_type :type/Latitude) - (get-special-type)) - ;; Make sure that sync-table runs set-table-pks-if-needed! - (do (db/update! Field (data/id :venues :id), :special_type nil) - (force-sync-table! (Table (data/id :venues))) - (get-special-type))])) - -;; ## FK SYNCING - -;; Check that Foreign Key relationships were created on sync as we expect - -(expect (data/id :venues :id) - (db/select-one-field :fk_target_field_id Field, :id (data/id :checkins :venue_id))) - -(expect (data/id :users :id) - (db/select-one-field :fk_target_field_id Field, :id (data/id :checkins :user_id))) - -(expect (data/id :categories :id) - (db/select-one-field :fk_target_field_id Field, :id (data/id :venues :category_id))) - -;; Check that sync-table! causes FKs to be set like we'd expect -(expect [{:special_type :type/FK, :fk_target_field_id true} - {:special_type nil, :fk_target_field_id false} - {:special_type :type/FK, :fk_target_field_id true}] - (let [field-id (data/id :checkins :user_id) - get-special-type-and-fk-exists? (fn [] - (into {} (-> (db/select-one [Field :special_type :fk_target_field_id], - :id field-id) - (update :fk_target_field_id #(db/exists? Field :id %)))))] - [ ;; FK should exist to start with - (get-special-type-and-fk-exists?) - ;; Clear out FK / special_type - (do (db/update! Field field-id, :special_type nil, :fk_target_field_id nil) - (get-special-type-and-fk-exists?)) - ;; Run sync-table and they should be set again - (let [table (Table (data/id :checkins))] - (sync-table! table) - (get-special-type-and-fk-exists?))])) - - -;;; ## FieldValues Syncing - -(let [get-field-values (fn [] (db/select-one-field :values FieldValues, :field_id (data/id :venues :price))) - get-field-values-id (fn [] (db/select-one-id FieldValues, :field_id (data/id :venues :price)))] - ;; Test that when we delete FieldValues syncing the Table again will cause them to be re-created - (expect - [[1 2 3 4] ; 1 - nil ; 2 - [1 2 3 4]] ; 3 - [ ;; 1. Check that we have expected field values to start with - (get-field-values) - ;; 2. Delete the Field values, make sure they're gone - (do (db/delete! FieldValues :id (get-field-values-id)) - (get-field-values)) - ;; 3. Now re-sync the table and make sure they're back - (do (sync-table! (Table (data/id :venues))) - (get-field-values))]) - - ;; Test that syncing will cause FieldValues to be updated - (expect - [[1 2 3 4] ; 1 - [1 2 3] ; 2 - [1 2 3 4]] ; 3 - [ ;; 1. Check that we have expected field values to start with - (get-field-values) - ;; 2. Update the FieldValues, remove one of the values that should be there - (do (db/update! FieldValues (get-field-values-id), :values [1 2 3]) - (get-field-values)) - ;; 3. Now re-sync the table and make sure the value is back - (do (sync-table! (Table (data/id :venues))) - (get-field-values))])) - -;; Make sure that if a Field's cardinality passes `list-cardinality-threshold` (currently 100) the corresponding -;; FieldValues entry will be deleted (#3215) -(defn- insert-range-sql - "Generate SQL to insert a row for each number in `rang`." - [rang] - (str "INSERT INTO blueberries_consumed (num) VALUES " - (str/join ", " (for [n rang] - (str "(" n ")"))))) - -(defn- exec! [conn statements] - (doseq [statement statements] - (jdbc/execute! conn [statement]))) - -(defmacro ^:private with-new-mem-db - "Setup a in-memory H2 database with a `Database` instance bound to `db-sym` and a connection to that H3 database - bound to `conn-sym`." - [db-sym conn-sym & body] - `(let [details# {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}] - (binding [mdb/*allow-potentailly-unsafe-connections* true] - (tt/with-temp Database [db# {:engine :h2, :details details#}] - (jdbc/with-db-connection [conn# (sql/connection-details->spec (driver/engine->driver :h2) details#)] - (let [~db-sym db# - ~conn-sym conn#] - ~@body)))))) - -(expect - false - (with-new-mem-db db conn - ;; create the `blueberries_consumed` table and insert 50 values - (exec! conn ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);" - (insert-range-sql (range 50))]) - (sync-database! db) - (let [table-id (db/select-one-id Table :db_id (u/get-id db)) - field-id (db/select-one-id Field :table_id table-id)] - ;; field values should exist... - (assert (= (count (db/select-one-field :values FieldValues :field_id field-id)) - 50)) - ;; ok, now insert enough rows to push the field past the `list-cardinality-threshold` and sync again, - ;; there should be no more field values - (exec! conn [(insert-range-sql (range 50 (+ 100 field-values/list-cardinality-threshold)))]) - (sync-database! db) - (db/exists? FieldValues :field_id field-id)))) - -(defn- narrow-to-min-max [row] - (-> row - (get-in [:type :type/Number]) - (select-keys [:min :max]) - (update :min #(u/round-to-decimals 4 %)) - (update :max #(u/round-to-decimals 4 %)))) - -(expect - [{:min -165.374 :max -73.9533} - {:min 10.0646 :max 40.7794}] - (tt/with-temp* [Database [database {:details (:details (Database (data/id))), :engine :h2}] - Table [table {:db_id (u/get-id database), :name "VENUES"}]] - (sync-table! table) - (map narrow-to-min-max - [(db/select-one-field :fingerprint Field, :id (data/id :venues :longitude)) - (db/select-one-field :fingerprint Field, :id (data/id :venues :latitude))]))) - -(defmacro ^{:style/indent 2} throw-if-called [fn-var & body] - `(with-redefs [~fn-var (fn [& args#] - (throw (RuntimeException. "Should not be called!")))] - ~@body)) - -;; Validate the changing of a column's type triggers a hash miss and sync -(expect - [ ;; Original column type - "SMALLINT" - ;; Altered the column, now it's an integer - "INTEGER" - ;; Original hash and the new one are not equal - false - ;; Reruning sync shouldn't change the hash - true] - (with-new-mem-db db conn - (let [get-table #(db/select-one Table :db_id (u/get-id db))] - ;; create the `blueberries_consumed` table and insert 50 values - (exec! conn ["CREATE TABLE blueberries_consumed (num SMALLINT NOT NULL);" - (insert-range-sql (range 50))]) - (sync-database! db) - ;; After this sync, we know about the new table and it's SMALLINT column - (let [table-id (u/get-id (get-table)) - get-field #(db/select-one Field :table_id table-id) - {old-hash :fields_hash} (get-table) - {old-db-type :database_type} (get-field)] - ;; Change the column from SMALLINT to INTEGER. In clojure-land these are both integers, but this change - ;; should trigger a hash miss and thus resync the table, since something has changed - (exec! conn ["ALTER TABLE blueberries_consumed ALTER COLUMN num INTEGER"]) - (sync-database! db) - (let [{new-hash :fields_hash} (get-table) - {new-db-type :database_type} (get-field)] - - ;; Syncing again with no change should not call sync-field-instances! or update the hash - (throw-if-called metabase.sync.sync-metadata.fields/sync-field-instances! - (sync-database! db) - [old-db-type - new-db-type - (= old-hash new-hash) - (= new-hash (:fields_hash (get-table)))])))))) - -(defn- table-md-with-hash [table-id] - {:active-fields (count (db/select Field :table_id table-id :active true)) - :inactive-fields (count (db/select Field :table_id table-id :active false)) - :fields-hash (:fields_hash (db/select-one Table :id table-id))}) - -(defn- no-fields-hash [m] - (dissoc m :fields-hash)) - -;; This tests a table that adds a column, ensures sync picked up the new column and the hash changed -(expect - [ - ;; Only the num column should be found - {:active-fields 1, :inactive-fields 0} - ;; Add a column, should still be no inactive - {:active-fields 2, :inactive-fields 0} - ;; Adding a column should make the hashes not equal - false - ] - (with-new-mem-db db conn - (let [get-table #(db/select-one Table :db_id (u/get-id db))] - ;; create the `blueberries_consumed` table and insert 50 values - (exec! conn ["CREATE TABLE blueberries_consumed (num SMALLINT NOT NULL)" - (insert-range-sql (range 50))]) - (sync-database! db) - ;; We should now have a hash value for num as a SMALLINT - (let [table-id (u/get-id (get-table)) - before-table-md (table-md-with-hash table-id) - _ (exec! conn ["ALTER TABLE blueberries_consumed ADD COLUMN weight FLOAT"]) - _ (sync-database! db) - ;; Now that hash will include num and weight - after-table-md (table-md-with-hash table-id)] - [(no-fields-hash before-table-md) - (no-fields-hash after-table-md) - (= (:fields-hash before-table-md) - (:fields-hash after-table-md))])))) - -;; Drops a column, ensures sync finds the drop, updates the hash -(expect - [ - ;; Test starts with two columns - {:active-fields 2, :inactive-fields 0} - ;; Dropped the weight column - {:active-fields 1, :inactive-fields 1} - ;; Hashes should be different without the weight column - false] - (with-new-mem-db db conn - (let [get-table #(db/select-one Table :db_id (u/get-id db))] - ;; create the `blueberries_consumed` table and insert 50 values - (exec! conn ["CREATE TABLE blueberries_consumed (num SMALLINT NOT NULL, weight FLOAT)" - (insert-range-sql (range 50))]) - (sync-database! db) - ;; We should now have a hash value for num as a SMALLINT - (let [table-id (u/get-id (get-table)) - before-table-md (table-md-with-hash table-id) - _ (exec! conn ["ALTER TABLE blueberries_consumed DROP COLUMN weight"]) - _ (sync-database! db) - ;; Now that hash will include num and weight - after-table-md (table-md-with-hash table-id)] - [(no-fields-hash before-table-md) - (no-fields-hash after-table-md) - (= (:fields-hash before-table-md) - (:fields-hash after-table-md))])))) - -;; Drops and readds a column, ensures that the hash is back to it's original value -(expect - [ - ;; Both num and weight columns should be found - {:active-fields 2, :inactive-fields 0} - ;; Both columns should still be present - {:active-fields 2, :inactive-fields 0} - ;; The hashes should be the same - true] - (with-new-mem-db db conn - (let [get-table #(db/select-one Table :db_id (u/get-id db))] - ;; create the `blueberries_consumed` table and insert 50 values - (exec! conn ["CREATE TABLE blueberries_consumed (num SMALLINT NOT NULL, weight FLOAT)" - (insert-range-sql (range 50))]) - (sync-database! db) - ;; We should now have a hash value for num as a SMALLINT - (let [table-id (u/get-id (get-table)) - before-table-md (table-md-with-hash table-id) - _ (exec! conn ["ALTER TABLE blueberries_consumed DROP COLUMN weight"]) - _ (sync-database! db) - _ (exec! conn ["ALTER TABLE blueberries_consumed ADD COLUMN weight FLOAT"]) - _ (sync-database! db) - ;; Now that hash will include num and weight - after-table-md (table-md-with-hash table-id)] - [(no-fields-hash before-table-md) - (no-fields-hash after-table-md) - (= (:fields-hash before-table-md) - (:fields-hash after-table-md))]))) ) +;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +;; !! !! +;; !! HEY! Your tests probably don't belong in this namespace! Put them in one appropriate to the specific part of !! +;; !! sync they are testing. !! +;; !! !! +;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! diff --git a/test/metabase/test/data/one_off_dbs.clj b/test/metabase/test/data/one_off_dbs.clj new file mode 100644 index 0000000000000000000000000000000000000000..b551a819ccacf3ed95c69816d468532df6f9e5c5 --- /dev/null +++ b/test/metabase/test/data/one_off_dbs.clj @@ -0,0 +1,76 @@ +(ns metabase.test.data.one-off-dbs + "Test utility functions for using one-off temporary in-memory H2 databases, including completely blank ones and the + infamous `blueberries_consumed` database, used by sync tests in several different namespaces." + (:require [clojure.java.jdbc :as jdbc] + [clojure.string :as str] + [metabase + [db :as mdb] + [driver :as driver] + [sync :as sync]] + [metabase.driver.generic-sql :as sql] + [metabase.models.database :refer [Database]] + [metabase.test + [data :as data] + [util :as tu]] + [toucan.util.test :as tt])) + +(def ^:dynamic *conn* + "Bound to a JDBC connection spec when using one of the `with-db` macros below." + nil) + +;;; ---------------------------------------- Generic Empty Temp In-Memory DB ----------------------------------------- + +(defn do-with-blank-db + "Impl for `with-blank-db` macro; prefer that to using this directly." + [f] + (let [details {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}] + (binding [mdb/*allow-potentailly-unsafe-connections* true] + (tt/with-temp Database [db {:engine :h2, :details details}] + (data/with-db db + (jdbc/with-db-connection [conn (sql/connection-details->spec (driver/engine->driver :h2) details)] + (binding [*conn* conn] + (f)))))))) + +(defmacro with-blank-db + "An empty canvas upon which you may paint your dreams. + + Creates a one-off tempory in-memory H2 database and binds this DB with `data/with-db` so you can use `data/db` and + `data/id` to access it. `*conn*` is bound to a JDBC connection spec so you can execute DDL statements to populate it + as needed." + {:style/indent 0} + [& body] + `(do-with-blank-db (fn [] ~@body))) + + +;;; ------------------------------------------------- Blueberries DB ------------------------------------------------- + +(defn do-with-blueberries-db + "Impl for `with-blueberries-db` macro; use that instead of using this directly." + [f] + (with-blank-db + (jdbc/execute! *conn* ["CREATE TABLE blueberries_consumed (num SMALLINT NOT NULL);"]) + (f))) + +(defmacro with-blueberries-db + "Creates a database with a single table, `blueberries_consumed`, with one column, `num`." + {:style/indent 0} + [& body] + `(do-with-blueberries-db (fn [] ~@body))) + + +;;; ------------------------------------ Helper Fns for Populating Blueberries DB ------------------------------------ + +(defn- insert-range-sql + "Generate SQL to insert a row for each number in `rang`." + [rang] + (str "INSERT INTO blueberries_consumed (num) VALUES " + (str/join ", " (for [n rang] + (str "(" n ")"))))) + +(defn insert-rows-and-sync! + "With the temp blueberries db from above, insert a `range` of values and re-sync the DB. + + (insert-rows-and-sync! [0 1 2 3]) ; insert 4 rows" + [rang] + (jdbc/execute! *conn* [(insert-range-sql rang)]) + (sync/sync-database! (data/db)))