Skip to content
Snippets Groups Projects
Commit 6a1e5b66 authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Merge pull request #5794 from metabase/remove-table-rows-sample-driver-method

Remove the table-rows-sample driver method
parents d28789b6 2381222f
No related branches found
No related tags found
No related merge requests found
Showing
with 185 additions and 264 deletions
......@@ -5,7 +5,7 @@
:description "Metabase Community Edition"
:url "http://metabase.com/"
:min-lein-version "2.5.0"
:aliases {"bikeshed" ["bikeshed" "--max-line-length" "240"]
:aliases {"bikeshed" ["bikeshed" "--max-line-length" "220"]
"check-reflection-warnings" ["with-profile" "+reflection-warnings" "check"]
"test" ["with-profile" "+expectations" "expectations"]
"generate-sample-dataset" ["with-profile" "+generate-sample-dataset" "run"]
......
......@@ -34,7 +34,7 @@
"Fetch *all* `Segments`."
[]
(filter mi/can-read? (-> (db/select Segment, :is_active true, {:order-by [[:%lower.name :asc]]})
(hydrate :creator))))
(hydrate :creator))))
(api/defendpoint PUT "/:id"
......
......@@ -3,10 +3,10 @@
[medley.core :as m]
[metabase.models
[database :refer [Database]]
field
[setting :refer [defsetting]]
table]
[setting :refer [defsetting]]]
[metabase.sync.interface :as si]
[metabase.util :as u]
[schema.core :as s]
[toucan.db :as db])
(:import clojure.lang.Keyword
metabase.models.database.DatabaseInstance
......@@ -15,13 +15,7 @@
;;; ## INTERFACE + CONSTANTS
(def ^:const max-sample-rows
"The maximum number of values we should return when using `table-rows-sample`.
This many is probably fine for inferring special types and what-not; we don't want
to scan millions of values at any rate."
10000)
(def ^:const connection-error-messages
(def connection-error-messages
"Generic error messages that drivers should return in their implementation of `humanize-connection-error-message`."
{:cannot-connect-check-host-and-port "Hmm, we couldn't connect to the database. Make sure your host and port settings are correct"
:ssh-tunnel-auth-fail "We couldn't connect to the ssh tunnel host. Check the username, password"
......@@ -124,13 +118,16 @@
* `:foreign-keys` - Does this database support foreign key relationships?
* `:nested-fields` - Does this database support nested fields (e.g. Mongo)?
* `:set-timezone` - Does this driver support setting a timezone for the query?
* `:basic-aggregations` - Does the driver support *basic* aggregations like `:count` and `:sum`? (Currently, everything besides standard deviation is considered \"basic\"; only GA doesn't support this).
* `:standard-deviation-aggregations` - Does this driver support [standard deviation aggregations](https://github.com/metabase/metabase/wiki/Query-Language-'98#stddev-aggregation)?
* `:expressions` - Does this driver support [expressions](https://github.com/metabase/metabase/wiki/Query-Language-'98#expressions) (e.g. adding the values of 2 columns together)?
* `:basic-aggregations` - Does the driver support *basic* aggregations like `:count` and `:sum`? (Currently,
everything besides standard deviation is considered \"basic\"; only GA doesn't support this).
* `:standard-deviation-aggregations` - Does this driver support standard deviation aggregations?
* `:expressions` - Does this driver support expressions (e.g. adding the values of 2 columns together)?
* `:dynamic-schema` - Does this Database have no fixed definitions of schemas? (e.g. Mongo)
* `:native-parameters` - Does the driver support parameter substitution on native queries?
* `:expression-aggregations` - Does the driver support using expressions inside aggregations? e.g. something like \"sum(x) + count(y)\" or \"avg(x + y)\"
* `:nested-queries` - Does the driver support using a query as the `:source-query` of another MBQL query? Examples are CTEs or subselects in SQL queries.")
* `:expression-aggregations` - Does the driver support using expressions inside aggregations? e.g. something like
\"sum(x) + count(y)\" or \"avg(x + y)\"
* `:nested-queries` - Does the driver support using a query as the `:source-query` of another MBQL query? Examples
are CTEs or subselects in SQL queries.")
(format-custom-field-name ^String [this, ^String custom-field-name]
"*OPTIONAL*. Return the custom name passed via an MBQL `:named` clause so it matches the way it is returned in the
......@@ -189,31 +186,8 @@
"*OPTIONAL*. Return a sequence of *all* the rows in a given TABLE, which is guaranteed to have at least `:name`
and `:schema` keys. (It is guaranteed too satisfy the `DatabaseMetadataTable` schema in
`metabase.sync.interface`.) Currently, this is only used for iterating over the values in a `_metabase_metadata`
table. As such, the results are not expected to be returned lazily.
There is no expectation that the results be returned in any given order.")
;; TODO - Not 100% sure we need this method since it seems like we could just use an MBQL query to fetch this info.
(table-rows-sample ^clojure.lang.Sequential [this, ^TableInstance table, fields]
"*OPTIONAL*. Return a sample of rows in TABLE with the specified FIELDS. This is used to implement some methods of the
database sync process which require rows of data during execution. At this time, this should just return a basic
sequence of rows in the fastest way possible, with no special sorting or any sort of randomization done to ensure
a good sample. (Improved sampling is something we plan to add in the future.)
The sample should return up to `max-sample-rows` rows, which is currently `10000`."))
(defn- table-rows-sample-via-qp
"Default implementation of `table-rows-sample` that just runs a basic MBQL query to fetch values for a Table.
Prefer this to writing your own implementation of `table-rows-sample`; those are around for purely historical
reasons and may be removed in the future."
[_ table fields]
(let [results ((resolve 'metabase.query-processor/process-query)
{:database (:db_id table)
:type :query
:query {:source-table (u/get-id table)
:fields (vec (for [field fields]
[:field-id (u/get-id field)]))
:limit max-sample-rows}})]
(get-in results [:data :rows])))
table. As such, the results are not expected to be returned lazily. There is no expectation that the results be
returned in any given order."))
(def IDriverDefaultsMixin
......@@ -226,8 +200,10 @@
:notify-database-updated (constantly nil)
:process-query-in-context (u/drop-first-arg identity)
:sync-in-context (fn [_ _ f] (f))
:table-rows-seq (constantly nil)
:table-rows-sample table-rows-sample-via-qp})
:table-rows-seq (fn [driver & _]
(throw
(NoSuchMethodException.
(str (name driver) " does not implement table-rows-seq."))))})
;;; ## CONFIG
......@@ -262,8 +238,8 @@
(log/warn (format "No -init-driver function found for '%s'" (name ns-symb)))))
(defn find-and-load-drivers!
"Search Classpath for namespaces that start with `metabase.driver.`, then `require` them and look for the `driver-init`
function which provides a uniform way for Driver initialization to be done."
"Search Classpath for namespaces that start with `metabase.driver.`, then `require` them and look for the
`driver-init` function which provides a uniform way for Driver initialization to be done."
[]
(doseq [ns-symb @u/metabase-namespace-symbols
:when (re-matches #"^metabase\.driver\.[a-z0-9_]+$" (name ns-symb))]
......@@ -381,3 +357,23 @@
(when rethrow-exceptions
(throw (Exception. (humanize-connection-error-message driver (.getMessage e)))))
false))))
(def ^:const max-sample-rows
"The maximum number of values we should return when using `table-rows-sample`.
This many is probably fine for inferring special types and what-not; we don't want
to scan millions of values at any rate."
10000)
;; TODO - move this to the metadata-queries namespace or something like that instead
(s/defn ^:always-validate ^{:style/indent 1} table-rows-sample :- (s/maybe si/TableSample)
"Run a basic MBQL query to fetch a sample of rows belonging to a Table."
[table :- si/TableInstance, fields :- [si/FieldInstance]]
(let [results ((resolve 'metabase.query-processor/process-query)
{:database (:db_id table)
:type :query
:query {:source-table (u/get-id table)
:fields (vec (for [field fields]
[:field-id (u/get-id field)]))
:limit max-sample-rows}})]
(get-in results [:data :rows])))
......@@ -187,16 +187,6 @@
(post-process-native (execute-bigquery database query-string))))
(defn- table-rows-sample [{table-name :name, :as table} fields]
(let [{{dataset-name :dataset-id} :details, :as db} (table/database table)]
(:rows (process-native* db (format "SELECT %s FROM [%s.%s] LIMIT %d"
(str/join ", " (for [{field-name :name} fields]
(format "[%s.%s.%s]" dataset-name table-name field-name)))
dataset-name table-name driver/max-sample-rows)))))
;;; # Generic SQL Driver Methods
(defn- date-add [unit timestamp interval]
......@@ -503,7 +493,6 @@
(when-not config/is-test?
;; during unit tests don't treat bigquery as having FK support
#{:foreign-keys})))
:table-rows-sample (u/drop-first-arg table-rows-sample)
:format-custom-field-name (u/drop-first-arg format-custom-field-name)
:mbql->native (u/drop-first-arg mbql->native)}))
......
......@@ -248,17 +248,6 @@
(query driver database table {:select [:*]}))
(defn- table-rows-sample [driver table fields]
(->> (binding [*jdbc-options* {:as-arrays? true}]
(query driver (table/database table) (apply-limit driver
{:select (for [field fields]
(keyword (:name field)))
:from [(qualify+escape table)]}
{:limit driver/max-sample-rows})))
;; the first row coming back will be the columns list so go ahead and drop it like it's hot
(drop 1)))
(defn features
"Default implementation of `IDriver` `features` for SQL drivers."
[driver]
......@@ -395,5 +384,4 @@
:features features
:mbql->native (resolve 'metabase.driver.generic-sql.query-processor/mbql->native)
:notify-database-updated notify-database-updated
:table-rows-seq table-rows-seq
:table-rows-sample table-rows-sample}))
:table-rows-seq table-rows-seq}))
......@@ -12,7 +12,7 @@
[com.google.api.services.analytics.model Column Columns Profile Profiles Webproperties Webproperty]
[java.util Collections Date Map]))
;;; ------------------------------------------------------------ Client ------------------------------------------------------------
;;; ---------------------------------------- Client ----------------------------------------
(defn- ^Analytics credential->client [^GoogleCredential credential]
(.build (doto (Analytics$Builder. google/http-transport google/json-factory credential)
......@@ -25,7 +25,7 @@
(comp credential->client database->credential))
;;; ------------------------------------------------------------ describe-database ------------------------------------------------------------
;;; ---------------------------------------- describe-database ----------------------------------------
(defn- fetch-properties
^Webproperties [^Analytics client, ^String account-id]
......@@ -55,9 +55,8 @@
:schema nil}))})
;;; ------------------------------------------------------------ describe-table ------------------------------------------------------------
;;; ---------------------------------------- describe-table ----------------------------------------
;; This is the
(def ^:private ^:const redundant-date-fields
"Set of column IDs covered by `unit->ga-dimension` in the GA QP.
We don't need to present them because people can just use date bucketing on the `ga:date` field."
......@@ -71,7 +70,8 @@
"ga:yearMonth"
"ga:month"
"ga:year"
;; leave these out as well because their display names are things like "Month" but they're not dates so they're not really useful
;; leave these out as well because their display names are things like "Month" but they're not dates so they're
;; not really useful
"ga:cohortNthDay"
"ga:cohortNthMonth"
"ga:cohortNthWeek"})
......@@ -113,44 +113,15 @@
:fields (describe-columns database)})
;;; ------------------------------------------------------------ _metabase_metadata ------------------------------------------------------------
(defn- property+profile->display-name
"Format a table name for a GA property and GA profile"
[^Webproperty property, ^Profile profile]
(let [property-name (s/replace (.getName property) #"^https?://" "")
profile-name (s/replace (.getName profile) #"^https?://" "")]
;; don't include the profile if it's the same as property-name or is the default "All Web Site Data"
(if (or (.contains property-name profile-name)
(= profile-name "All Web Site Data"))
property-name
(str property-name " (" profile-name ")"))))
(defn- table-rows-seq [database table]
;; this method is only supposed to be called for _metabase_metadata, make sure that's the case
{:pre [(= (:name table) "_metabase_metadata")]}
;; now build a giant sequence of all the things we want to set
(apply concat
;; set display_name for all the tables
(for [[^Webproperty property, ^Profile profile] (properties+profiles database)]
(cons {:keypath (str (.getId profile) ".display_name")
:value (property+profile->display-name property profile)}
;; set display_name and description for each column for this table
(apply concat (for [^Column column (columns database)]
[{:keypath (str (.getId profile) \. (.getId column) ".display_name")
:value (column-attribute column :uiName)}
{:keypath (str (.getId profile) \. (.getId column) ".description")
:value (column-attribute column :description)}]))))))
;;; ------------------------------------------------------------ can-connect? ------------------------------------------------------------
;;;---------------------------------------- can-connect?----------------------------------------
(defn- can-connect? [details-map]
{:pre [(map? details-map)]}
(boolean (profile-ids {:details details-map})))
;;; ------------------------------------------------------------ execute-query ------------------------------------------------------------
;;;---------------------------------------- execute-query----------------------------------------
(defn- column-with-name ^Column [database-or-id column-name]
(some (fn [^Column column]
......@@ -170,7 +141,8 @@
(= data-type "STRING") :type/Text)]
{:base_type base-type})))))
;; memoize this because the display names and other info isn't going to change and fetching this info from GA can take around half a second
;; memoize this because the display names and other info isn't going to change and fetching this info from GA can take
;; around half a second
(def ^:private ^{:arglists '([database-id column-name])} memoized-column-metadata
(memoize column-metadata))
......@@ -213,7 +185,7 @@
(google/execute (mbql-query->request query)))
;;; ------------------------------------------------------------ Driver ------------------------------------------------------------
;;; ---------------------------------------- Driver ----------------------------------------
(defrecord GoogleAnalyticsDriver []
clojure.lang.Named
......@@ -243,8 +215,7 @@
:required true}])
:execute-query (u/drop-first-arg (partial qp/execute-query do-query))
:process-query-in-context (u/drop-first-arg process-query-in-context)
:mbql->native (u/drop-first-arg qp/mbql->native)
:table-rows-seq (u/drop-first-arg table-rows-seq)}))
:mbql->native (u/drop-first-arg qp/mbql->native)}))
(defn -init-driver
"Register the Google Analytics driver"
......
......@@ -147,20 +147,6 @@
:fields (set (for [field (keys parsed-rows)]
(describe-table-field field (field parsed-rows))))})))
(s/defn ^:private ^:always-validate table-rows-sample [table :- si/TableInstance, fields :- [si/FieldInstance]]
(assert *mongo-connection*
"You must have an open Mongo connection in order to get lazy results with table-rows-sample.")
(let [fields (for [field fields]
(let [name-components (rest (field/qualified-name-components field))]
(assert (seq name-components))
(assoc field :name-components name-components)))
results (mq/with-collection *mongo-connection* (:name table)
(mq/fields (for [field fields]
(str/join \. (:name-components field)))))]
(for [row results]
(for [field fields]
(get-in row (map keyword (:name-components field)))))))
(defrecord MongoDriver []
clojure.lang.Named
......@@ -203,7 +189,6 @@
:placeholder "readPreference=nearest&replicaSet=test"}]))
:execute-query (u/drop-first-arg qp/execute-query)
:features (constantly #{:basic-aggregations :dynamic-schema :nested-fields})
:table-rows-sample (u/drop-first-arg table-rows-sample)
:humanize-connection-error-message (u/drop-first-arg humanize-connection-error-message)
:mbql->native (u/drop-first-arg qp/mbql->native)
:process-query-in-context (u/drop-first-arg process-query-in-context)
......
......@@ -181,15 +181,6 @@
:columns (map (comp keyword :name) columns)
:rows rows}))
(defn- table-rows-sample [table fields]
;; TODO - look into making this actually lazy
(let [{:keys [details]} (table/database table)
sql (format "SELECT %s FROM %s LIMIT %d"
(str/join ", " (for [{field-name :name} fields]
(quote-name field-name)))
(quote+combine-names (:schema table) (:name table))
driver/max-sample-rows)]
(:rows (execute-presto-query! details sql))))
(defn- humanize-connection-error-message [message]
(condp re-matches message
......@@ -205,13 +196,6 @@
#".*" ; default
message))
(defn- table-rows-seq [{:keys [details]} {:keys [schema name]}]
(let [sql (format "SELECT * FROM %s" (quote+combine-names schema name))
{:keys [rows], :as result} (execute-presto-query! details sql)
columns (map (comp keyword :name) (:columns result))]
(for [row rows]
(zipmap columns row))))
;;; ISQLDriver implementation
......@@ -310,9 +294,7 @@
(when-not config/is-test?
;; during unit tests don't treat presto as having FK support
#{:foreign-keys})))
:table-rows-sample (u/drop-first-arg table-rows-sample)
:humanize-connection-error-message (u/drop-first-arg humanize-connection-error-message)
:table-rows-seq (u/drop-first-arg table-rows-seq)})
:humanize-connection-error-message (u/drop-first-arg humanize-connection-error-message)})
sql/ISQLDriver
(merge (sql/ISQLDriverDefaultsMixin)
......
......@@ -13,15 +13,7 @@
"Procure a sequence of table rows, up to `max-sample-rows` (10,000 at the time of this writing), for
use in the fingerprinting sub-stage of analysis. Returns `nil` if no rows are available."
[table :- i/TableInstance, fields :- [i/FieldInstance]]
;; TODO - we should make `->driver` a method so we can pass things like Fields into it
(let [db-id (:db_id table)
driver (driver/->driver db-id)
database (Database db-id)]
(driver/sync-in-context driver database
(fn []
(->> (driver/table-rows-sample driver table fields)
(take driver/max-sample-rows)
seq)))))
(seq (driver/table-rows-sample table fields)))
(s/defn ^:private ^:always-validate table-sample->field-sample :- (s/maybe i/FieldSample)
"Fetch a sample for the Field whose values are at INDEX in the TABLE-SAMPLE.
......@@ -34,8 +26,7 @@
(s/defn ^:always-validate sample-fields :- [(s/pair i/FieldInstance "Field", (s/maybe i/FieldSample) "FieldSample")]
"Fetch samples for a series of FIELDS. Returns tuples of Field and sample.
This may return `nil` if the driver doesn't support `table-rows-sample` or the sample could not be fetched for some
other reason."
This may return `nil` if the sample could not be fetched for some other reason."
[table :- i/TableInstance, fields :- [i/FieldInstance]]
(when-let [table-sample (basic-sample table fields)]
(for [[i field] (m/indexed fields)]
......
......@@ -34,10 +34,9 @@
[3 "The Apple Pan"]
[4 "Wurstküche"]
[5 "Brite Spot Family Restaurant"]]
(->> (driver/table-rows-sample (BigQueryDriver.)
(Table (data/id :venues))
[(Field (data/id :venues :id))
(Field (data/id :venues :name))])
(->> (driver/table-rows-sample (Table (data/id :venues))
[(Field (data/id :venues :id))
(Field (data/id :venues :name))])
(sort-by first)
(take 5)))
......
......@@ -27,10 +27,9 @@
["100" "PizzaHacker" "2014-07-26T07:00:00.000Z"]
["1000" "Tito's Tacos" "2014-06-03T07:00:00.000Z"]
["101" "Golden Road Brewing" "2015-09-04T07:00:00.000Z"]]
(->> (driver/table-rows-sample (DruidDriver.)
(Table (data/id :checkins))
[(Field (data/id :checkins :id))
(Field (data/id :checkins :venue_name))])
(->> (driver/table-rows-sample (Table (data/id :checkins))
[(Field (data/id :checkins :id))
(Field (data/id :checkins :venue_name))])
(sort-by first)
(take 5)))
......
......@@ -72,7 +72,8 @@
["33 Taps"]
["800 Degrees Neapolitan Pizzeria"]
["BCD Tofu House"]]
(->> (#'sql/table-rows-sample datasets/*driver* (Table (data/id :venues)) [(Field (data/id :venues :name))])
(->> (driver/table-rows-sample (Table (data/id :venues))
[(Field (data/id :venues :name))])
;; since order is not guaranteed do some sorting here so we always get the same results
(sort-by first)
(take 5)))
......
......@@ -6,14 +6,12 @@
[driver :as driver]
[query-processor :as qp]
[query-processor-test :refer [rows]]]
[metabase.driver.mongo.query-processor :as mongo-qp]
[metabase.models
[field :refer [Field]]
[field-values :refer [FieldValues]]
[table :as table :refer [Table]]]
[metabase.query-processor.middleware.expand :as ql]
[metabase.test
[data :as data]
[util :as tu]]
[metabase.test.data :as data]
[metabase.test.data
[datasets :as datasets]
[interface :as i]]
......@@ -127,10 +125,9 @@
[5 "Brite Spot Family Restaurant"]]
(driver/sync-in-context (MongoDriver.) (data/db)
(fn []
(vec (take 5 (driver/table-rows-sample (MongoDriver.)
(Table (data/id :venues))
[(Field (data/id :venues :id))
(Field (data/id :venues :name))]))))))
(vec (take 5 (driver/table-rows-sample (Table (data/id :venues))
[(Field (data/id :venues :id))
(Field (data/id :venues :name))]))))))
;; ## Big-picture tests for the way data should look post-sync
......@@ -187,38 +184,38 @@
(ql/filter (ql/= $bird_id "abcdefabcdefabcdefabcdef"))))))
;;; ------------------------------------------------------------ Test that we can handle native queries with "ISODate(...)" and "ObjectId(...) forms (#3741, #4448) ------------------------------------------------------------
(tu/resolve-private-vars metabase.driver.mongo.query-processor
maybe-decode-fncall decode-fncalls encode-fncalls)
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | ISODate(...) AND ObjectId(...) HANDLING (#3741, #4448) |
;;; +----------------------------------------------------------------------------------------------------------------+
(expect
"[{\"$match\":{\"date\":{\"$gte\":[\"___ISODate\", \"2012-01-01\"]}}}]"
(encode-fncalls "[{\"$match\":{\"date\":{\"$gte\":ISODate(\"2012-01-01\")}}}]"))
(#'mongo-qp/encode-fncalls "[{\"$match\":{\"date\":{\"$gte\":ISODate(\"2012-01-01\")}}}]"))
(expect
"[{\"$match\":{\"entityId\":{\"$eq\":[\"___ObjectId\", \"583327789137b2700a1621fb\"]}}}]"
(encode-fncalls "[{\"$match\":{\"entityId\":{\"$eq\":ObjectId(\"583327789137b2700a1621fb\")}}}]"))
(#'mongo-qp/encode-fncalls "[{\"$match\":{\"entityId\":{\"$eq\":ObjectId(\"583327789137b2700a1621fb\")}}}]"))
;; make sure fn calls with no arguments work as well (#4996)
(expect
"[{\"$match\":{\"date\":{\"$eq\":[\"___ISODate\"]}}}]"
(encode-fncalls "[{\"$match\":{\"date\":{\"$eq\":ISODate()}}}]"))
(#'mongo-qp/encode-fncalls "[{\"$match\":{\"date\":{\"$eq\":ISODate()}}}]"))
(expect
(DateTime. "2012-01-01")
(maybe-decode-fncall ["___ISODate" "2012-01-01"]))
(#'mongo-qp/maybe-decode-fncall ["___ISODate" "2012-01-01"]))
(expect
(ObjectId. "583327789137b2700a1621fb")
(maybe-decode-fncall ["___ObjectId" "583327789137b2700a1621fb"]))
(#'mongo-qp/maybe-decode-fncall ["___ObjectId" "583327789137b2700a1621fb"]))
(expect
[{:$match {:date {:$gte (DateTime. "2012-01-01")}}}]
(decode-fncalls [{:$match {:date {:$gte ["___ISODate" "2012-01-01"]}}}]))
(#'mongo-qp/decode-fncalls [{:$match {:date {:$gte ["___ISODate" "2012-01-01"]}}}]))
(expect
[{:$match {:entityId {:$eq (ObjectId. "583327789137b2700a1621fb")}}}]
(decode-fncalls [{:$match {:entityId {:$eq ["___ObjectId" "583327789137b2700a1621fb"]}}}]))
(#'mongo-qp/decode-fncalls [{:$match {:entityId {:$eq ["___ObjectId" "583327789137b2700a1621fb"]}}}]))
(datasets/expect-with-engine :mongo
5
......
......@@ -105,24 +105,9 @@
["The Apple Pan"]
["Wurstküche"]
["Brite Spot Family Restaurant"]]
(take 5 (driver/table-rows-sample (PrestoDriver.)
(Table (data/id :venues))
[(Field (data/id :venues :name))])))
(take 5 (driver/table-rows-sample (Table (data/id :venues))
[(Field (data/id :venues :name))])))
;;; TABLE-ROWS-SEQ
(datasets/expect-with-engine :presto
[{:name "Red Medicine", :price 3, :category_id 4, :id 1}
{:name "Stout Burgers & Beers", :price 2, :category_id 11, :id 2}
{:name "The Apple Pan", :price 2, :category_id 11, :id 3}
{:name "Wurstküche", :price 2, :category_id 29, :id 4}
{:name "Brite Spot Family Restaurant", :price 2, :category_id 20, :id 5}]
(for [row (take 5 (sort-by :id (driver/table-rows-seq (PrestoDriver.)
(db/select-one 'Database :id (data/id))
(db/select-one 'Table :id (data/id :venues)))))]
(-> (dissoc row :latitude :longitude)
(update :price int)
(update :category_id int)
(update :id int))))
;;; APPLY-PAGE
(expect
......
......@@ -4,7 +4,7 @@
[metabase.query-processor.middleware
[expand :as ql]
[resolve :as resolve]
[source-table :as st]]
[source-table :as source-table]]
[metabase.test
[data :refer :all]
[util :as tu]]
......@@ -30,7 +30,7 @@
resolving the source table and the middleware that resolves the rest
of the expanded query into a single function to make tests more
concise."
(comp resolve/resolve (st/resolve-source-table-middleware identity)))
(comp resolve/resolve (source-table/resolve-source-table-middleware identity)))
(def ^:private field-ph-defaults
{:fk-field-id nil
......@@ -55,12 +55,12 @@
:values []})
(def ^:private price-field-values
{:field-value-id true
:created-at true
:updated-at true
:values [1 2 3 4]
{:field-value-id true
:created-at true
:updated-at true
:values [1 2 3 4]
:human-readable-values {}
:field-id true})
:field-id true})
;; basic rows query w/ filter
(expect
......@@ -91,7 +91,8 @@
:schema-name "PUBLIC"
:table-name "VENUES"
:values price-field-values
:fingerprint {:global {:distinct-count 4}, :type {:type/Number {:min 1, :max 4, :avg 2.03}}}})
:fingerprint {:global {:distinct-count 4}
:type {:type/Number {:min 1, :max 4, :avg 2.03}}}})
:value {:value 1
:field (merge field-defaults
{:field-id true
......@@ -103,7 +104,8 @@
:schema-name "PUBLIC"
:table-name "VENUES"
:values price-field-values
:fingerprint {:global {:distinct-count 4}, :type {:type/Number {:min 1, :max 4, :avg 2.03}}}})}}
:fingerprint {:global {:distinct-count 4}
:type {:type/Number {:min 1, :max 4, :avg 2.03}}}})}}
:join-tables nil}
......@@ -154,7 +156,11 @@
:table-id (id :categories)
:table-name "CATEGORIES__via__CATEGORY_ID"
:values category-field-values
:fingerprint {:global {:distinct-count 75}, :type {:type/Text {:percent-json 0.0, :percent-url 0.0, :percent-email 0.0, :average-length 8.333333333333334}}}})
:fingerprint {:global {:distinct-count 75}
:type {:type/Text {:percent-json 0.0
:percent-url 0.0
:percent-email 0.0
:average-length 8.333333333333334}}}})
:value {:value "abc"
:field (merge field-defaults
{:field-id true
......@@ -166,7 +172,11 @@
:table-id (id :categories)
:table-name "CATEGORIES__via__CATEGORY_ID"
:values category-field-values
:fingerprint {:global {:distinct-count 75}, :type {:type/Text {:percent-json 0.0, :percent-url 0.0, :percent-email 0.0, :average-length 8.333333333333334}}}})}}
:fingerprint {:global {:distinct-count 75}
:type {:type/Text {:percent-json 0.0
:percent-url 0.0
:percent-email 0.0
:average-length 8.333333333333334}}}})}}
:join-tables [{:source-field {:field-id true
:field-name "CATEGORY_ID"}
:pk-field {:field-id true
......@@ -179,8 +189,8 @@
:table-ids #{(id :categories)}}]
(tu/boolean-ids-and-timestamps
(let [expanded-form (ql/expand (wrap-inner-query (query venues
(ql/filter (ql/= $category_id->categories.name
"abc")))))]
(ql/filter (ql/= $category_id->categories.name
"abc")))))]
(mapv obj->map [expanded-form
(resolve' expanded-form)]))))
......@@ -217,7 +227,7 @@
:special-type nil
:table-id (id :users)
:table-name "USERS__via__USER_ID"
:fingerprint {:global {:distinct-count 15}}})
:fingerprint {:global {:distinct-count 11}}})
:unit :year}
:value {:value (u/->Timestamp "1980-01-01")
:field {:field
......@@ -231,7 +241,7 @@
:visibility-type :normal
:table-id (id :users)
:table-name "USERS__via__USER_ID"
:fingerprint {:global {:distinct-count 15}}})
:fingerprint {:global {:distinct-count 11}}})
:unit :year}}}
:join-tables [{:source-field {:field-id (id :checkins :user_id)
:field-name "USER_ID"}
......@@ -259,11 +269,11 @@
:aggregation [{:aggregation-type :sum
:custom-name nil
:field (merge field-ph-defaults
{:field-id true
:fk-field-id (id :checkins :venue_id)})}]
{:field-id true
:fk-field-id (id :checkins :venue_id)})}]
:breakout [(merge field-ph-defaults
{:field-id true
:datetime-unit :day-of-week})]}}
{:field-id true
:datetime-unit :day-of-week})]}}
;; resolved form
{:database (id)
:type :query
......@@ -282,7 +292,8 @@
:fk-field-id (id :checkins :venue_id)
:table-name "VENUES__via__VENUE_ID"
:values price-field-values
:fingerprint {:global {:distinct-count 4}, :type {:type/Number {:min 1, :max 4, :avg 2.03}}}})}]
:fingerprint {:global {:distinct-count 4}
:type {:type/Number {:min 1, :max 4, :avg 2.03}}}})}]
:breakout [{:field (merge field-defaults
{:base-type :type/Date
:table-id (id :checkins)
......@@ -305,8 +316,8 @@
:fk-field-ids #{(id :checkins :venue_id)}
:table-ids #{(id :venues) (id :checkins)}}]
(let [expanded-form (ql/expand (wrap-inner-query (query checkins
(ql/aggregation (ql/sum $venue_id->venues.price))
(ql/breakout (ql/datetime-field $checkins.date :day-of-week)))))]
(ql/aggregation (ql/sum $venue_id->venues.price))
(ql/breakout (ql/datetime-field $checkins.date :day-of-week)))))]
(tu/boolean-ids-and-timestamps
(mapv obj->map [expanded-form
(resolve' expanded-form)]))))
......@@ -115,7 +115,11 @@
:base_type (data/expected-base-type->actual :type/Text)
:name (data/format-name "name")
:display_name "Name"
:fingerprint {:global {:distinct-count 75}, :type {:type/Text {:percent-json 0.0, :percent-url 0.0, :percent-email 0.0, :average-length 8.333}}}})))
:fingerprint {:global {:distinct-count 75}
:type {:type/Text {:percent-json 0.0
:percent-url 0.0
:percent-email 0.0
:average-length 8.333}}}})))
;; #### users
(defn users-col
......@@ -135,13 +139,17 @@
:base_type (data/expected-base-type->actual :type/Text)
:name (data/format-name "name")
:display_name "Name"
:fingerprint {:global {:distinct-count 15}, :type {:type/Text {:percent-json 0.0, :percent-url 0.0, :percent-email 0.0, :average-length 13.267}}}}
:fingerprint {:global {:distinct-count 15}
:type {:type/Text {:percent-json 0.0
:percent-url 0.0
:percent-email 0.0
:average-length 13.267}}}}
:last_login {:special_type nil
:base_type (data/expected-base-type->actual :type/DateTime)
:name (data/format-name "last_login")
:display_name "Last Login"
:unit :default
:fingerprint {:global {:distinct-count 15}}})))
:fingerprint {:global {:distinct-count 11}}})))
;; #### venues
(defn venues-columns
......
......@@ -16,9 +16,11 @@
[metabase.test
[data :refer :all]
[util :as tu]]
[metabase.test.mock.util :as mock-util]
[toucan.db :as db]
[toucan.util.test :as tt]))
(def ^:private ^:const sync-test-tables
{"movie" {:name "movie"
:schema "default"
......@@ -57,20 +59,15 @@
:schema nil}
:dest-column-name "studio"}})))
;; enough values that it won't get marked as a Category, but still get a fingerprint or w/e
(defn- table-rows-sample [_ _ fields]
(for [i (range 500)]
(repeat (count fields) i)))
(extend SyncTestDriver
driver/IDriver
(merge driver/IDriverDefaultsMixin
{:describe-database describe-database
:describe-table describe-table
:describe-table-fks describe-table-fks
:features (constantly #{:foreign-keys})
:details-fields (constantly [])
:table-rows-sample table-rows-sample}))
{:describe-database describe-database
:describe-table describe-table
:describe-table-fks describe-table-fks
:features (constantly #{:foreign-keys})
:details-fields (constantly [])
:process-query-in-context mock-util/process-query-in-context}))
(driver/register-driver! :sync-test (SyncTestDriver.))
......@@ -98,7 +95,7 @@
:entity_type nil
:entity_name nil
:visibility_type nil
:rows nil
:rows 1000
:active true
:created_at true
:updated_at true})
......
......@@ -3,7 +3,7 @@
This is a `:dynamic-schema` db with `:nested-fields`.
Most notably meant to serve as a representation of a Mongo database."
(:require [metabase.driver :as driver]
[metabase.test.mock.util :refer [table-defaults field-defaults]]))
[metabase.test.mock.util :as mock-util]))
(def ^:private ^:const toucanery-tables
......@@ -54,11 +54,6 @@
[{:keypath "movies.filming.description", :value "If the movie is currently being filmed."}
{:keypath "movies.description", :value "A cinematic adventure."}]))
;; enough so it can get fingerprinted, but not be a category
(defn- table-rows-sample [_ _ fields]
(for [i (range 500)]
(repeat (count fields) i)))
(defrecord ToucaneryDriver []
clojure.lang.Named
......@@ -67,77 +62,77 @@
(extend ToucaneryDriver
driver/IDriver
(merge driver/IDriverDefaultsMixin
{:describe-database describe-database
:describe-table describe-table
:features (constantly #{:dynamic-schema :nested-fields})
:details-fields (constantly [])
:table-rows-seq table-rows-seq
:table-rows-sample table-rows-sample}))
{:describe-database describe-database
:describe-table describe-table
:features (constantly #{:dynamic-schema :nested-fields})
:details-fields (constantly [])
:table-rows-seq table-rows-seq
:process-query-in-context mock-util/process-query-in-context}))
(driver/register-driver! :toucanery (ToucaneryDriver.))
(def toucanery-tables-and-fields
[(merge table-defaults
[(merge mock-util/table-defaults
{:name "employees"
:fields [(merge field-defaults
:fields [(merge mock-util/field-defaults
{:name "id"
:display_name "ID"
:base_type :type/Integer
:special_type :type/PK})
(merge field-defaults
(merge mock-util/field-defaults
{:name "name"
:display_name "Name"
:base_type :type/Text
:special_type :type/Name})]
:display_name "Employees"})
(merge table-defaults
(merge mock-util/table-defaults
{:name "transactions"
:fields [(merge field-defaults
:fields [(merge mock-util/field-defaults
{:name "age"
:display_name "Age"
:base_type :type/Integer
:parent_id true})
(merge field-defaults
(merge mock-util/field-defaults
{:name "buyer"
:display_name "Buyer"
:base_type :type/Dictionary})
(merge field-defaults
(merge mock-util/field-defaults
{:name "cc"
:display_name "Cc"
:base_type :type/Text
:parent_id true})
(merge field-defaults
(merge mock-util/field-defaults
{:name "details"
:display_name "Details"
:base_type :type/Dictionary
:parent_id true})
(merge field-defaults
(merge mock-util/field-defaults
{:name "id"
:display_name "ID"
:base_type :type/Integer
:special_type :type/PK})
(merge field-defaults
(merge mock-util/field-defaults
{:name "name"
:display_name "Name"
:base_type :type/Text
:parent_id true
:special_type :type/Name})
(merge field-defaults
(merge mock-util/field-defaults
{:name "name"
:display_name "Name"
:base_type :type/Text
:parent_id true
:special_type :type/Name})
(merge field-defaults
(merge mock-util/field-defaults
{:name "toucan"
:display_name "Toucan"
:base_type :type/Dictionary})
(merge field-defaults
(merge mock-util/field-defaults
{:name "ts"
:display_name "Ts"
:base_type :type/BigInteger
:special_type :type/UNIXTimestampMilliseconds})
(merge field-defaults
(merge mock-util/field-defaults
{:name "weight"
:display_name "Weight"
:base_type :type/Decimal
......
(ns metabase.test.mock.util)
(ns metabase.test.mock.util
(:require [metabase.query-processor :as qp]))
(def table-defaults
{:description nil
......@@ -35,3 +36,29 @@
:visibility_type :normal
:preview_display true
:created_at true})
;; This is just a fake implementation that just swoops in and returns somewhat-correct looking results for different
;; queries we know will get ran as part of sync
(defn- is-table-row-count-query? [expanded-query]
(= :count (get-in expanded-query [:query :aggregation 0 :aggregation-type])))
(defn- is-table-sample-query? [expanded-query]
(seq (get-in expanded-query [:query :fields])))
(defn process-query-in-context
"QP mock that will return some 'appropriate' fake answers to the questions we know are ran during the sync process
-- the ones that determine Table row count and rows samples (for fingerprinting). Currently does not do anything
for any other queries, including ones for determining FieldValues."
[_ _]
(fn [query]
(let [expanded-query (qp/expand query)]
{:data
{:rows
(cond
(is-table-row-count-query? expanded-query) [[1000]]
(is-table-sample-query? expanded-query) (let [fields-count (count (get-in query [:query :fields]))]
(for [i (range 500)]
(repeat fields-count i)))
:else nil)}})))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment