Skip to content
Snippets Groups Projects
Commit 78ad50f1 authored by Cam Saül's avatar Cam Saül
Browse files

In-DB Caching :floppy_disk:

parent 11250505
Branches
Tags
No related merge requests found
Showing
with 535 additions and 83 deletions
......@@ -187,6 +187,34 @@ const SECTIONS = [
getHidden: (settings) => !settings["enable-public-sharing"]
}
]
},
{
name: "Caching",
settings: [
{
key: "enable-query-caching",
display_name: "Enable Caching",
type: "boolean"
},
{
key: "query-caching-min-ttl",
display_name: "Minimum Query Duration",
type: "string",
getHidden: (settings) => !settings["enable-query-caching"]
},
{
key: "query-caching-ttl-ratio",
display_name: "Cache Time-To-Live (TTL)",
type: "string",
getHidden: (settings) => !settings["enable-query-caching"]
},
{
key: "query-caching-max-kb",
display_name: "Max Cache Entry Size",
type: "string",
getHidden: (settings) => !settings["enable-query-caching"]
}
]
}
];
for (const section of SECTIONS) {
......
......@@ -49,6 +49,7 @@
[com.mattbertolini/liquibase-slf4j "2.0.0"] ; Java Migrations lib
[com.mchange/c3p0 "0.9.5.2"] ; connection pooling library
[com.novemberain/monger "3.1.0"] ; MongoDB Driver
[com.taoensso/nippy "2.12.2"] ; Fast serialization (i.e., GZIP) library for Clojure
[compojure "1.5.2"] ; HTTP Routing library built on Ring
[environ "1.1.0"] ; easy environment management
[hiccup "1.0.5"] ; HTML templating
......
databaseChangeLog:
- property:
name: blob.type
value: blob
dbms: mysql,h2
- property:
name: blob.type
value: bytea
dbms: postgresql
- changeSet:
id: 51
author: camsaul
changes:
- createTable:
tableName: query_cache
remarks: 'Cached results of queries are stored here when using the DB-based query cache.'
columns:
- column:
name: query_hash
type: binary(32)
remarks: 'The hash of the query dictionary. (This is a 256-bit SHA3 hash of the query dict).'
constraints:
primaryKey: true
nullable: false
- column:
name: updated_at
type: datetime
remarks: 'The timestamp of when these query results were last refreshed.'
constraints:
nullable: false
- column:
name: results
type: ${blob.type}
remarks: 'Cached, compressed results of running the query with the given hash.'
constraints:
nullable: false
- createIndex:
tableName: query_cache
indexName: idx_query_cache_updated_at
columns:
column:
name: updated_at
- addColumn:
tableName: report_card
columns:
- column:
name: cache_ttl
type: int
remarks: 'The maximum time, in seconds, to return cached results for this Card rather than running a new query.'
(ns metabase.api.card
(:require [clojure.data :as data]
[clojure.tools.logging :as log]
[cheshire.core :as json]
[compojure.core :refer [GET POST DELETE PUT]]
[schema.core :as s]
......@@ -20,8 +21,11 @@
[permissions :as perms]
[table :refer [Table]]
[view-log :refer [ViewLog]])
(metabase [query-processor :as qp]
[util :as u])
[metabase.public-settings :as public-settings]
[metabase.query-processor :as qp]
[metabase.query-processor.middleware.cache :as cache]
[metabase.query-processor.util :as qputil]
[metabase.util :as u]
[metabase.util.schema :as su])
(:import java.util.UUID))
......@@ -345,15 +349,33 @@
;;; ------------------------------------------------------------ Running a Query ------------------------------------------------------------
(defn- query-magic-ttl
"Compute a 'magic' cache TTL time (in seconds) for QUERY by multipling its historic average execution times by the `query-caching-ttl-ratio`.
If the TTL is less than a second, this returns `nil` (i.e., the cache should not be utilized.)"
[query]
(when-let [average-duration (qputil/query-average-duration query)]
(let [ttl-seconds (Math/round (float (/ (* average-duration (public-settings/query-caching-ttl-ratio))
1000)))]
(when-not (zero? ttl-seconds)
(log/info (format "Question's average execution duration is %d ms; using 'magic' TTL of %d seconds" (Math/round average-duration) ttl-seconds) (u/emoji "💾"))
ttl-seconds))))
(defn- query-for-card [card parameters constraints]
(let [query (assoc (:dataset_query card)
:constraints constraints
:parameters parameters)
ttl (when (public-settings/enable-query-caching)
(or (:cache_ttl card)
(query-magic-ttl query)))]
(assoc query :cache-ttl ttl)))
(defn run-query-for-card
"Run the query for Card with PARAMETERS and CONSTRAINTS, and return results in the usual format."
[card-id & {:keys [parameters constraints]
:or {constraints dataset-api/default-query-constraints}}]
{:pre [(u/maybe? sequential? parameters)]}
(let [card (read-check Card card-id)
query (assoc (:dataset_query card)
:parameters parameters
:constraints constraints)
query (query-for-card card parameters constraints)
options {:executed-by *current-user-id*
:card-id card-id}]
(check-not-archived card)
......@@ -361,20 +383,24 @@
(defendpoint POST "/:card-id/query"
"Run the query associated with a Card."
[card-id :as {{:keys [parameters]} :body}]
(run-query-for-card card-id, :parameters parameters))
[card-id :as {{:keys [parameters ignore_cache], :or {ignore_cache false}} :body}]
{ignore_cache (s/maybe s/Bool)}
(binding [cache/*ignore-cached-results* ignore_cache]
(run-query-for-card card-id, :parameters parameters)))
(defendpoint POST "/:card-id/query/csv"
"Run the query associated with a Card, and return its results as CSV. Note that this expects the parameters as serialized JSON in the 'parameters' parameter"
[card-id parameters]
{parameters (s/maybe su/JSONString)}
(dataset-api/as-csv (run-query-for-card card-id, :parameters (json/parse-string parameters keyword), :constraints nil)))
(binding [cache/*ignore-cached-results* true]
(dataset-api/as-csv (run-query-for-card card-id, :parameters (json/parse-string parameters keyword), :constraints nil))))
(defendpoint POST "/:card-id/query/json"
"Run the query associated with a Card, and return its results as JSON. Note that this expects the parameters as serialized JSON in the 'parameters' parameter"
[card-id parameters]
{parameters (s/maybe su/JSONString)}
(dataset-api/as-json (run-query-for-card card-id, :parameters (json/parse-string parameters keyword), :constraints nil)))
(binding [cache/*ignore-cached-results* true]
(dataset-api/as-json (run-query-for-card card-id, :parameters (json/parse-string parameters keyword), :constraints nil))))
;;; ------------------------------------------------------------ Sharing is Caring ------------------------------------------------------------
......
......@@ -7,10 +7,10 @@
(toucan [db :as db]
[hydrate :refer [hydrate]])
(metabase.models [card :refer [Card]]
[database :refer [Database]]
[query-execution :refer [QueryExecution]])
(metabase [query-processor :as qp]
[util :as u])
[database :refer [Database]])
[metabase.query-processor :as qp]
[metabase.query-processor.util :as qputil]
[metabase.util :as u]
[metabase.util.schema :as su]))
(def ^:private ^:const max-results-bare-rows
......@@ -38,16 +38,8 @@
"Get historical query execution duration."
[:as {{:keys [database] :as query} :body}]
(read-check Database database)
;; add sensible constraints for results limits on our query
(let [query (assoc query :constraints default-query-constraints)
running-times (db/select-field :running_time QueryExecution
:query_hash (hash query)
{:order-by [[:started_at :desc]]
:limit 10})]
{:average (if (empty? running-times)
0
(float (/ (reduce + running-times)
(count running-times))))}))
{:average (or (qputil/query-average-duration query)
0)})
(defn as-csv
"Return a CSV response containing the RESULTS of a query."
......
......@@ -35,8 +35,8 @@
(defendpoint GET "/"
"Fetch *all* `Metrics`."
[id]
(filter mi/can-read? (-> (db/select Metric, :is_active true)
(hydrate :creator))))
(filter mi/can-read? (-> (db/select Metric, :is_active true, {:order-by [:%lower.name]})
(hydrate :creator))))
(defendpoint PUT "/:id"
......
......@@ -29,7 +29,8 @@
:mb-version-info-url "http://static.metabase.com/version-info.json"
:max-session-age "20160" ; session length in minutes (14 days)
:mb-colorize-logs "true"
:mb-emoji-in-logs "true"})
:mb-emoji-in-logs "true"
:mb-qp-cache-backend "db"})
(defn config-str
......
......@@ -6,7 +6,8 @@
[metabase.util :as u])
(:import javax.mail.Session))
;; ## CONFIG
;;; CONFIG
;; TODO - smtp-port should be switched to type :integer
(defsetting email-from-address "Email address you want to use as the sender of Metabase." :default "notifications@metabase.com")
(defsetting email-smtp-host "The address of the SMTP server that handles your emails.")
......
(ns metabase.models.interface
(:require [clojure.core.memoize :as memoize]
[cheshire.core :as json]
[taoensso.nippy :as nippy]
[toucan.models :as models]
[metabase.config :as config]
[metabase.util :as u]
[metabase.util.encryption :as encryption]))
[metabase.util.encryption :as encryption])
(:import java.sql.Blob))
;;; ------------------------------------------------------------ Toucan Extensions ------------------------------------------------------------
(models/set-root-namespace! 'metabase.models)
;;; types
(defn- json-in [obj]
(if (string? obj)
obj
......@@ -39,6 +44,24 @@
:in encrypted-json-in
:out (comp cached-encrypted-json-out u/jdbc-clob->str))
(defn compress
"Compress OBJ, returning a byte array."
[obj]
(nippy/freeze obj {:compressor nippy/snappy-compressor}))
(defn decompress
"Decompress COMPRESSED-BYTES."
[compressed-bytes]
(if (instance? Blob compressed-bytes)
(recur (.getBytes ^Blob compressed-bytes 0 (.length ^Blob compressed-bytes)))
(nippy/thaw compressed-bytes {:compressor nippy/snappy-compressor})))
(models/add-type! :compressed
:in compress
:out decompress)
;;; properties
(defn- add-created-at-timestamp [obj & _]
(assoc obj :created_at (u/new-sql-timestamp)))
......@@ -50,6 +73,11 @@
:insert (comp add-created-at-timestamp add-updated-at-timestamp)
:update add-updated-at-timestamp)
;; like `timestamped?`, but for models that only have an `:updated_at` column
(models/add-property! :updated-at-timestamped?
:insert add-updated-at-timestamp
:update add-updated-at-timestamp)
;;; ------------------------------------------------------------ New Permissions Stuff ------------------------------------------------------------
......
(ns metabase.models.query-cache
"A model used to cache query results in the database."
(:require [toucan.models :as models]
[metabase.util :as u]))
(models/defmodel QueryCache :query_cache)
(u/strict-extend (class QueryCache)
models/IModel
(merge models/IModelDefaults
{:types (constantly {:results :compressed})
:properties (constantly {:updated-at-timestamped? true})}))
......@@ -52,7 +52,7 @@
(def ^:private Type
(s/enum :string :boolean :json))
(s/enum :string :boolean :json :integer))
(def ^:private SettingDefinition
{:name s/Keyword
......@@ -151,6 +151,12 @@
^Boolean [setting-or-name]
(string->boolean (get-string setting-or-name)))
(defn get-integer
"Get integer value of (presumably `:integer`) SETTING-OR-NAME. This is the default getter for `:integer` settings."
^Integer [setting-or-name]
(when-let [s (get-string setting-or-name)]
(Integer/parseInt s)))
(defn get-json
"Get the string value of SETTING-OR-NAME and parse it as JSON."
[setting-or-name]
......@@ -159,6 +165,7 @@
(def ^:private default-getter-for-type
{:string get-string
:boolean get-boolean
:integer get-integer
:json get-json})
(defn get
......@@ -204,6 +211,15 @@
false "false"
nil nil))))
(defn set-integer!
"Set the value of integer SETTING-OR-NAME."
[setting-or-name new-value]
(set-string! setting-or-name (when new-value
(assert (or (integer? new-value)
(and (string? new-value)
(re-matches #"^\d+$" new-value))))
(str new-value))))
(defn set-json!
"Serialize NEW-VALUE for SETTING-OR-NAME as a JSON string and save it."
[setting-or-name new-value]
......@@ -213,6 +229,7 @@
(def ^:private default-setter-for-type
{:string set-string!
:boolean set-boolean!
:integer set-integer!
:json set-json!})
(defn set!
......@@ -300,7 +317,7 @@
You may optionally pass any of the OPTIONS below:
* `:default` - The default value of the setting. (default: `nil`)
* `:type` - `:string` (default), `:boolean`, or `:json`. Non-`:string` settings have special default getters and setters that automatically coerce values to the correct types.
* `:type` - `:string` (default), `:boolean`, `:integer`, or `:json`. Non-`:string` settings have special default getters and setters that automatically coerce values to the correct types.
* `:internal?` - This `Setting` is for internal use and shouldn't be exposed in the UI (i.e., not
returned by the corresponding endpoints). Default: `false`
* `:getter` - A custom getter fn, which takes no arguments. Overrides the default implementation.
......
......@@ -46,6 +46,40 @@
:type :boolean
:default false)
(defsetting enable-query-caching
"Enabling caching will save the results of queries that take a long time to run."
:type :boolean
:default false)
(defsetting query-caching-max-kb
"The maximum size of the cache per card, in kilobytes:"
;; (This size is a measurement of the length of *uncompressed* serialized result *rows*. The actual size of
;; the results as stored will vary somewhat, since this measurement doesn't include metadata returned with the
;; results, and doesn't consider whether the results are compressed, as the `:db` backend does.)
:type :integer
:default 1000)
(defsetting query-caching-max-ttl
"The absoulte maximum time to keep any cached query results, in seconds."
:type :integer
:default (* 60 60 24 100)) ; 100 days
(defsetting query-caching-min-ttl
"Metabase will cache all saved questions with an average query execution time longer than
this many seconds:"
:type :integer
:default 60)
(defsetting query-caching-ttl-ratio
"To determine how long each saved question's cached result should stick around, we take the
query's average execution time and multiply that by whatever you input here. So if a query
takes on average 2 minutes to run, and you input 10 for your multiplier, its cache entry
will persist for 20 minutes."
:type :integer
:default 10)
(defn remove-public-uuid-if-public-sharing-is-disabled
"If public sharing is *disabled* and OBJECT has a `:public_uuid`, remove it so people don't try to use it (since it won't work).
Intended for use as part of a `post-select` implementation for Cards and Dashboards."
......@@ -70,21 +104,25 @@
(defn public-settings
"Return a simple map of key/value pairs which represent the public settings (`MetabaseBootstrap`) for the front-end application."
[]
{:admin_email (admin-email)
:anon_tracking_enabled (anon-tracking-enabled)
:custom_geojson (setting/get :custom-geojson)
:email_configured ((resolve 'metabase.email/email-configured?))
:engines ((resolve 'metabase.driver/available-drivers))
:ga_code "UA-60817802-1"
:google_auth_client_id (setting/get :google-auth-client-id)
:has_sample_dataset (db/exists? 'Database, :is_sample true)
:map_tile_server_url (map-tile-server-url)
:password_complexity password/active-password-complexity
:public_sharing (enable-public-sharing)
:report_timezone (setting/get :report-timezone)
:setup_token ((resolve 'metabase.setup/token-value))
:site_name (site-name)
:timezone_short (short-timezone-name (setting/get :report-timezone))
:timezones common/timezones
:types (types/types->parents)
:version config/mb-version-info})
{:admin_email (admin-email)
:anon_tracking_enabled (anon-tracking-enabled)
:custom_geojson (setting/get :custom-geojson)
:email_configured ((resolve 'metabase.email/email-configured?))
:enable_query_caching (enable-query-caching)
:engines ((resolve 'metabase.driver/available-drivers))
:ga_code "UA-60817802-1"
:google_auth_client_id (setting/get :google-auth-client-id)
:has_sample_dataset (db/exists? 'Database, :is_sample true)
:map_tile_server_url (map-tile-server-url)
:password_complexity password/active-password-complexity
:public_sharing (enable-public-sharing)
:query-caching-max-kb (query-caching-max-kb)
:query-caching-min-ttl (query-caching-min-ttl)
:query-caching-ttl-ratio (query-caching-ttl-ratio)
:report_timezone (setting/get :report-timezone)
:setup_token ((resolve 'metabase.setup/token-value))
:site_name (site-name)
:timezone_short (short-timezone-name (setting/get :report-timezone))
:timezones common/timezones
:types (types/types->parents)
:version config/mb-version-info})
......@@ -10,6 +10,7 @@
[add-settings :as add-settings]
[annotate-and-sort :as annotate-and-sort]
[catch-exceptions :as catch-exceptions]
[cache :as cache]
[cumulative-aggregations :as cumulative-ags]
[dev :as dev]
[driver-specific :as driver-specific]
......@@ -77,8 +78,9 @@
driver-specific/process-query-in-context ; (drivers can inject custom middleware if they implement IDriver's `process-query-in-context`)
add-settings/add-settings
resolve-driver/resolve-driver ; ▲▲▲ DRIVER RESOLUTION POINT ▲▲▲ All functions *above* will have access to the driver during PRE- *and* POST-PROCESSING
catch-exceptions/catch-exceptions
log-query/log-initial-query)
log-query/log-initial-query
cache/maybe-return-cached-results
catch-exceptions/catch-exceptions)
query))
;; ▲▲▲ PRE-PROCESSING ▲▲▲ happens from BOTTOM-TO-TOP, e.g. the results of `expand-macros` are (eventually) passed to `expand-resolve`
......@@ -98,14 +100,9 @@
;;; +----------------------------------------------------------------------------------------------------+
(defn- save-query-execution!
"Save (or update) a `QueryExecution`."
[{:keys [id], :as query-execution}]
(if id
;; execution has already been saved, so update it
(u/prog1 query-execution
(db/update! QueryExecution id query-execution))
;; first time saving execution, so insert it
(db/insert! QueryExecution query-execution)))
"Save a `QueryExecution`."
[query-execution]
(db/insert! QueryExecution query-execution))
(defn- save-and-return-failed-query!
"Save QueryExecution state and construct a failed query response"
......@@ -130,18 +127,22 @@
(defn- save-and-return-successful-query!
"Save QueryExecution state and construct a completed (successful) query response"
[query-execution query-result]
;; record our query execution and format response
(-> (assoc query-execution
:status :completed
:finished_at (u/new-sql-timestamp)
:running_time (- (System/currentTimeMillis)
(:start_time_millis query-execution))
:result_rows (get query-result :row_count 0))
(dissoc :start_time_millis)
save-query-execution!
;; at this point we've saved and we just need to massage things into our final response format
(dissoc :error :raw_query :result_rows :version)
(merge query-result)))
(let [query-execution (-> (assoc query-execution
:status :completed
:finished_at (if (:cached? query-result)
(:updated-at query-result)
(u/new-sql-timestamp))
:running_time (- (System/currentTimeMillis)
(:start_time_millis query-execution))
:result_rows (get query-result :row_count 0))
(dissoc :start_time_millis))]
;; only insert a new record into QueryExecution if the results *were not* cached (i.e., only if a Query was actually ran)
(-> (if (:cached? query-result)
query-execution
(save-query-execution! query-execution))
;; ok, now return the results in the normal response format
(dissoc :error :raw_query :result_rows :version)
(merge query-result))))
(defn- assert-query-status-successful
......@@ -211,10 +212,9 @@
*allow-queries-with-no-executor-id*)
(u/maybe? integer? card-id)]}
(let [query-uuid (str (java.util.UUID/randomUUID))
query-hash (hash query)
query (assoc query :info {:executed-by executed-by
:card-id card-id
:uuid query-uuid
:query-hash query-hash
:query-hash (qputil/query-hash query)
:query-type (if (qputil/mbql-query? query) "MBQL" "native")})]
(run-and-save-query! query)))
(ns metabase.query-processor.middleware.cache
"Middleware that returns cached results for queries when applicable.
If caching is enabled (`enable-query-caching` is `true`) cached results will be returned for Cards if possible. There's
a global default TTL defined by the setting `query-caching-default-ttl`, but individual Cards can override this value
with custom TTLs with a value for `:cache_ttl`.
For all other queries, caching is skipped.
Various caching backends are defined in `metabase.query-processor.middleware.cache-backend` namespaces.
The default backend is `db`, which uses the application database; this value can be changed by setting the env var
`MB_QP_CACHE_BACKEND`.
Refer to `metabase.query-processor.middleware.cache-backend.interface` for more details about how the cache backends themselves."
(:require [clojure.tools.logging :as log]
[metabase.config :as config]
[metabase.public-settings :as public-settings]
[metabase.query-processor.middleware.cache-backend.interface :as i]
[metabase.query-processor.util :as qputil]
[metabase.util :as u]))
(def ^:dynamic ^Boolean *ignore-cached-results*
"Should we force the query to run, ignoring cached results even if they're available?
Setting this to `true` will run the query again and will still save the updated results."
false)
;;; ------------------------------------------------------------ Backend ------------------------------------------------------------
(def ^:private backend-instance
(atom nil))
(defn- set-backend!
"Set the cache backend to the cache defined by the keyword BACKEND.
(This should be something like `:db`, `:redis`, or `:memcached`. See the
documentation in `metabase.query-processor.middleware.cache-backend.interface` for details on how this works.)"
([]
(set-backend! (config/config-kw :mb-qp-cache-backend)))
([backend]
(let [backend-ns (symbol (str "metabase.query-processor.middleware.cache-backend." (munge (name backend))))]
(require backend-ns)
(log/info "Using query processor cache backend:" (u/format-color 'blue backend) (u/emoji "💾"))
(let [instance (ns-resolve backend-ns 'instance)]
(assert instance
(str "No var named 'instance' found in namespace " backend-ns))
(assert (extends? i/IQueryProcessorCacheBackend (class @instance))
(str "%s/instance doesn't satisfy IQueryProcessorCacheBackend" backend-ns))
(reset! backend-instance @instance)))))
;;; ------------------------------------------------------------ Cache Operations ------------------------------------------------------------
(defn- cached-results [query-hash max-age-seconds]
(when-not *ignore-cached-results*
(when-let [results (i/cached-results @backend-instance query-hash max-age-seconds)]
(assert (u/is-temporal? (:updated-at results))
"cached-results should include an `:updated-at` field containing the date when the query was last ran.")
(log/info "Returning cached results for query" (u/emoji "💾"))
(assoc results :cached? true))))
(defn- save-results! [query-hash results]
(log/info "Caching results for next time for query" (u/emoji "💾"))
(i/save-results! @backend-instance query-hash results))
;;; ------------------------------------------------------------ Middleware ------------------------------------------------------------
(defn- is-cacheable? ^Boolean [{cache-ttl :cache-ttl}]
(boolean (and (public-settings/enable-query-caching)
cache-ttl)))
(defn- results-are-below-max-byte-threshold?
"Measure the size of the `:rows` in QUERY-RESULTS and see whether they're smaller than `query-caching-max-kb`
*before* compression."
^Boolean [{{rows :rows} :data}]
(let [max-bytes (* (public-settings/query-caching-max-kb) 1024)]
;; We don't want to serialize the entire result set since that could explode if the query is one that returns a
;; huge number of rows. (We also want to keep `:rows` lazy.)
;; So we'll serialize one row at a time, and keep a running total of bytes; if we pass the `query-caching-max-kb`
;; threshold, we'll fail right away.
(loop [total-bytes 0, [row & more] rows]
(cond
(> total-bytes max-bytes) false
(not row) true
:else (recur (+ total-bytes (count (str row)))
more)))))
(defn- save-results-if-successful! [query-hash results]
(when (and (= (:status results) :completed)
(or (results-are-below-max-byte-threshold? results)
(log/info "Results are too large to cache." (u/emoji "😫"))))
(save-results! query-hash results)))
(defn- run-query-and-save-results-if-successful! [query-hash qp query]
(let [start-time-ms (System/currentTimeMillis)
results (qp query)
total-time-ms (- (System/currentTimeMillis) start-time-ms)
min-ttl-ms (* (public-settings/query-caching-min-ttl) 1000)]
(log/info (format "Query took %d ms to run; miminum for cache eligibility is %d ms" total-time-ms min-ttl-ms))
(when (>= total-time-ms min-ttl-ms)
(save-results-if-successful! query-hash results))
results))
(defn- run-query-with-cache [qp {cache-ttl :cache-ttl, :as query}]
(let [query-hash (qputil/secure-query-hash query)]
(or (cached-results query-hash cache-ttl)
(run-query-and-save-results-if-successful! query-hash qp query))))
(defn maybe-return-cached-results
"Middleware for caching results of a query if applicable.
In order for a query to be eligible for caching:
* Caching (the `enable-query-caching` Setting) must be enabled
* The query must pass a `:cache-ttl` value. For Cards, this can be the value of `:cache_ttl`,
otherwise falling back to the value of the `query-caching-default-ttl` Setting.
* The query must already be permissions-checked. Since the cache bypasses the normal
query processor pipeline, the ad-hoc permissions-checking middleware isn't applied for cached results.
(The various `/api/card/` endpoints that make use of caching do `can-read?` checks for the Card *before*
running the query, satisfying this requirement.)
* The result *rows* of the query must be less than `query-caching-max-kb` when serialized (before compression)."
[qp]
;; choose the caching backend if needed
(when-not @backend-instance
(set-backend!))
;; ok, now do the normal middleware thing
(fn [query]
(if-not (is-cacheable? query)
(qp query)
(run-query-with-cache qp query))))
(ns metabase.query-processor.middleware.cache-backend.db
(:require [toucan.db :as db]
(metabase.models [interface :as models]
[query-cache :refer [QueryCache]])
[metabase.public-settings :as public-settings]
[metabase.query-processor.middleware.cache-backend.interface :as i]
[metabase.util :as u]))
(defn- cached-results
"Return cached results for QUERY-HASH if they exist and are newer than MAX-AGE-SECONDS."
[query-hash max-age-seconds]
(when-let [{:keys [results updated_at]} (db/select-one [QueryCache :results :updated_at]
:query_hash query-hash
:updated_at [:>= (u/->Timestamp (- (System/currentTimeMillis)
(* 1000 max-age-seconds)))])]
(assoc results :updated-at updated_at)))
(defn- purge-old-cache-entries!
"Delete any cache entries that are older than the global max age `max-cache-entry-age-seconds` (currently 3 months)."
[]
(db/simple-delete! QueryCache
:updated_at [:<= (u/->Timestamp (- (System/currentTimeMillis)
(* 1000 (public-settings/query-caching-max-ttl))))]))
(defn- save-results!
"Save the RESULTS of query with QUERY-HASH, updating an existing QueryCache entry
if one already exists, otherwise creating a new entry."
[query-hash results]
(purge-old-cache-entries!)
(or (db/update-where! QueryCache {:query_hash query-hash}
:updated_at (u/new-sql-timestamp)
:results (models/compress results)) ; have to manually call these here since Toucan doesn't call type conversion fns for update-where! (yet)
(db/insert! QueryCache
:query_hash query-hash
:results results))
:ok)
(def instance
"Implementation of `IQueryProcessorCacheBackend` that uses the database for caching results."
(reify i/IQueryProcessorCacheBackend
(cached-results [_ query-hash max-age-seconds] (cached-results query-hash max-age-seconds))
(save-results! [_ query-hash results] (save-results! query-hash results))))
(ns metabase.query-processor.middleware.cache-backend.interface
"Interface used to define different Query Processor cache backends.
Defining a backend is straightforward: define a new namespace with the pattern
metabase.query-processor.middleware.cache-backend.<backend>
Where backend is a key representing the backend, e.g. `db`, `redis`, or `memcached`.
In that namespace, create an object that reifies (or otherwise implements) `IQueryProcessorCacheBackend`.
This object *must* be stored in a var called `instance`.
That's it. See `metabase.query-processor.middleware.cache-backend.db` for a complete example of how this is done.")
(defprotocol IQueryProcessorCacheBackend
"Protocol that different Metabase cache backends must implement.
QUERY-HASH as passed below is a byte-array representing a 256-byte SHA3 hash; encode this as needed for use as a
cache entry key. RESULTS are passed (and should be returned) as a Clojure object, and individual backends are free
to encode this as appropriate when storing the results. (It's probably not a bad idea to compress the results; this
is what the `:db` backend does.)"
(cached-results [this, query-hash, ^Integer max-age-seconds]
"Return cached results for the query with byte array QUERY-HASH if those results are present in the cache and are less
than MAX-AGE-SECONDS old. Otherwise, return `nil`.
This method must also return a Timestamp from when the query was last ran. This must be `assoc`ed with the query results
under the key `:updated-at`.
(cached-results [_ query-hash max-age-seconds]
(when-let [[results updated-at] (maybe-fetch-results query-hash max-age-seconds)]
(assoc results :updated-at updated-at)))")
(save-results! [this query-hash results]
"Add a cache entry with the RESULTS of running query with byte array QUERY-HASH.
This should replace any prior entries for QUERY-HASH and update the cache timestamp to the current system time.
(This is also an appropriate point to purge any entries older than the value of the `query-caching-max-ttl` Setting.)"))
(ns metabase.query-processor.util
"Utility functions used by the global query processor and middleware functions.")
"Utility functions used by the global query processor and middleware functions."
(:require [buddy.core.hash :as hash]
[toucan.db :as db]
[metabase.models.query-execution :refer [QueryExecution]]
[metabase.util :as u]))
(defn mbql-query?
"Is the given query an MBQL query?"
......@@ -25,3 +29,41 @@
See documentation for `mbql->native` and [issue #2386](https://github.com/metabase/metabase/issues/2386) for more information."
^String [{{:keys [executed-by uuid query-hash query-type], :as info} :info}]
(format "Metabase:: userID: %s executionID: %s queryType: %s queryHash: %s" executed-by uuid query-type query-hash))
;;; ------------------------------------------------------------ Hashing & Historic Execution Times ------------------------------------------------------------
;; There are two ways we hash queries: the O.G. `query-hash` function which is basically a thin wrapper around `clojure.core/hash`, and a cryptographically-secure
;; SHA3 256-bit `secure-query-hash`.
;; The former is used in the remarks that are appended to queries and in recording QueryExecutions;
;; the latter is used as a key for queries for caching results.
;; Eventually, we'll move towards using `secure-query-hash` for everything, but first we'll have to migrate the `QueryExecution` table, which is challenging
;; because the rows in the table numbers in the multimillions on larger instances.
(defn- select-keys-for-hashing
"Return QUERY with only the keys relevant to hashing kept.
(This is done so irrelevant info or options that don't affect query results doesn't result in the same query producing different hashes.)"
[query]
{:pre [(map? query)]}
(select-keys query [:database :type :query :parameters :constraints]))
(defn query-hash
"A non-cryptographic hash of QUERY, returned as an Integer."
^Integer [query]
(hash (select-keys-for-hashing query)))
(defn secure-query-hash
"Return a 256-bit SHA3 hash of QUERY as a key for the cache. (This is returned as a byte array.)"
[query]
(hash/sha3-256 (str (select-keys-for-hashing query))))
(defn query-average-duration
"Return the average running time of QUERY over the last 10 executions in milliseconds.
Returns `nil` if there's not available data."
^Float [query]
(when-let [running-times (db/select-field :running_time QueryExecution
:query_hash (query-hash query)
{:order-by [[:started_at :desc]]
:limit 10})]
(float (/ (reduce + running-times)
(count running-times)))))
......@@ -36,7 +36,8 @@
:display "scalar"
:made_public_by_id nil
:public_uuid nil
:query_type "query"})
:query_type "query"
:cache_ttl nil})
;; ## GET /api/card
;; Filter cards by database
......@@ -154,7 +155,7 @@
;; Test that we can make a card
(let [card-name (random-name)]
(tt/expect-with-temp [Database [{database-id :id}]
Table [{table-id :id} {:db_id database-id}]]
Table [{table-id :id} {:db_id database-id}]]
(merge card-defaults
{:name card-name
:creator_id (user->id :rasta)
......
......@@ -8,20 +8,25 @@
;; Check to make sure we're migrating all of our entities.
;; This fetches the `metabase.cmd.load-from-h2/entities` and compares it all existing entities
(defn- migrated-entity-names []
(defn- migrated-model-names []
(set (map :name @(resolve 'metabase.cmd.load-from-h2/entities))))
(defn- all-entity-names []
(def ^:private models-to-exclude
"Models that should *not* be migrated in `load-from-h2`."
#{"QueryCache"})
(defn- all-model-names []
(set (for [ns (ns-find/find-namespaces (classpath/classpath))
:when (or (re-find #"^metabase\.models\." (name ns))
(= (name ns) "metabase.db.migrations"))
:when (not (re-find #"test" (name ns)))
[_ varr] (do (require ns)
(ns-interns ns))
:let [entity (var-get varr)]
:when (models/model? entity)]
(:name entity))))
:let [{model-name :name, :as model} (var-get varr)]
:when (and (models/model? model)
(not (contains? models-to-exclude model-name)))]
model-name)))
(expect
(all-entity-names)
(migrated-entity-names))
(all-model-names)
(migrated-model-names))
......@@ -231,7 +231,7 @@
;; sync one last time
(sync!)
;; now take a look at the Tables in the database related to the view. THERE SHOULD BE ONLY ONE!
(db/select [Table :name :active] :db_id (u/get-id database), :name "angry_birds")))))
(map (partial into {}) (db/select [Table :name :active] :db_id (u/get-id database), :name "angry_birds"))))))
;;; timezone tests
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment