From eb04c931a6ae2de24c4021911cb5a21a19b80a39 Mon Sep 17 00:00:00 2001
From: Cam Saul <>
Date: Tue, 4 Sep 2018 15:32:44 -0700
Subject: [PATCH] Fix uberjar caching

 .circleci/config.yml           |  28 ++--
 .circleci/ |   7 +
 .gitignore                     |   1 +
 bin/build-for-test             |  43 -----
 bin/ci                         | 282 ---------------------------------
 5 files changed, 25 insertions(+), 336 deletions(-)
 delete mode 100755 bin/build-for-test
 delete mode 100755 bin/ci

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 18726592dd1..e64f346ac1e 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -13,12 +13,6 @@ restore-fe-deps-cache: &restore-fe-deps-cache
     - fe-deps-{{ checksum "yarn.lock" }}
     - fe-deps-
-restore-uberjar-cache: &restore-uberjar-cache
-  keys:
-    - uberjar-{{ .Branch }}-{{ .Revision }}
-    - uberjar-{{ .Branch }}
-    - uberjar-
 version: 2.1
@@ -39,6 +33,15 @@ jobs:
           key: source-{{ .Branch }}-{{ .Revision }}
             - .git
+      # The basic idea here is to generate a file with checksums for all the backend source files, and save it as
+      # `./backend-checksums.txt`. Then we'll use the checksum of that files for uberjar caching; thus we can reuse
+      # the same uberjar for integration tests across any build where the backend files are the same
+      - run:
+          name: Generate checksums of all backend source files to use as Uberjar cache key
+          command: >
+            for file in `find ./src -type f -name '*.clj' | sort`;
+              do echo `md5sum $file` >> backend-checksums.txt;
+            done
       - persist_to_workspace:
           root: /home/circleci/
@@ -428,13 +431,15 @@ jobs:
       - restore_cache:
           <<: *restore-be-deps-cache
       - restore_cache:
-          <<: *restore-uberjar-cache
+          keys:
+            - uberjar-{{ checksum "./backend-checksums.txt" }}
       - run:
-          name: Build uberjar
-          command: ./bin/build-for-test
+          name: Build uberjar if needed
+          command: >
+            if [ ! -f './target/uberjar/metabase.jar' ]; then ./bin/build version uberjar; fi
           no_output_timeout: 5m
       - save_cache:
-          key: uberjar-{{ .Branch }}-{{ .Revision }}
+          key: uberjar-{{ checksum "./backend-checksums.txt" }}
             - /home/circleci/metabase/metabase/target/uberjar/metabase.jar
@@ -446,7 +451,8 @@ jobs:
       - restore_cache:
           <<: *restore-fe-deps-cache
       - restore_cache:
-          <<: *restore-uberjar-cache
+          keys:
+            - uberjar-{{ checksum "./backend-checksums.txt" }}
       - run:
           name: Generate version file
           command: ./bin/build version
diff --git a/.circleci/ b/.circleci/
index d998d27469f..d0ee1ff9fdd 100755
--- a/.circleci/
+++ b/.circleci/
@@ -1,5 +1,12 @@
 #!/usr/bin/env bash
+# Determines whether we should skip tests for a driver, usage:
+#    ./.circleci/ oracle
+# Returns false if the commit message contains [ci all], [ci drivers], or [ci <driver-name>],
+# or if the current branch is master or a release branch.
 set -eu
 COMMIT_MESSAGE=`git log -1 --oneline`
diff --git a/.gitignore b/.gitignore
index f82f949b170..70e402a9dfd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,4 @@ coverage-summary.json
diff --git a/bin/build-for-test b/bin/build-for-test
deleted file mode 100755
index d6f3dd6d0d3..00000000000
--- a/bin/build-for-test
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-set -eu
-source-hash() {
-  # hash all the files that might change a backend-only uberjar build (for integration tests)
-  (
-    find src project.clj resources/ -type f -print0 | xargs -0 shasum ;
-    find resources -type f \( -iname \*.clj -o -iname \*.edn -o -iname \*.yaml -o -iname \*.properties \) -not -name "" -print0 | xargs -0 shasum ;
-  ) | shasum | awk '{ print $1 }'
-uberjar-hash() {
-  # java -jar target/uberjar/metabase.jar version | grep -oE 'source_hash [a-f0-9]+' | awk '{ print $2 }'
-  # pulling the directly from the jar is much faster
-  unzip -c target/uberjar/metabase.jar | grep "$VERSION_PROPERTY_NAME" | cut -f2 -d=
-check-uberjar-hash() {
-  expected_hash=$(source-hash)
-  actual_hash=$(uberjar-hash)
-  if [ "$expected_hash" == "$actual_hash" ]; then
-    return 0
-  else
-    return 1
-  fi
-build-uberjar-for-test() {
-  echo "$VERSION_PROPERTY_NAME=$(source-hash)" >> resources/
-  ./bin/build uberjar
-./bin/build version
-if [ ! -f "target/uberjar/metabase.jar" ] || ! check-uberjar-hash; then
-  echo "Building uberjar for testing"
-  build-uberjar-for-test
-  echo "Uberjar already up to date for testing"
diff --git a/bin/ci b/bin/ci
deleted file mode 100755
index 91d024cecde..00000000000
--- a/bin/ci
+++ /dev/null
@@ -1,282 +0,0 @@
-#!/usr/bin/env bash
-# this ensures any failures along the way result in a CI failure
-set -eu
-node-0() {
-    is_enabled "drivers" && export ENGINES="h2,mongo,mysql,bigquery,sparksql" || export ENGINES="h2"
-    if is_engine_enabled "mongo"; then
-        run_step install-mongodb
-    fi
-    if is_engine_enabled "sparksql"; then
-        run_step install-sparksql
-    fi
-    MB_MYSQL_TEST_USER=ubuntu run_step lein-test
-node-1() {
-    run_step lein with-profile +ci docstring-checker
-    is_enabled "drivers" && export ENGINES="h2,sqlserver,oracle" || export ENGINES="h2"
-    if is_engine_enabled "oracle"; then
-        run_step install-oracle
-    fi
-    MB_DB_TYPE=postgres MB_DB_DBNAME=circle_test MB_DB_PORT=5432 MB_DB_USER=ubuntu MB_DB_HOST=localhost \
-        run_step lein-test
-node-2() {
-    run_step lein with-profile +ci bikeshed
-    is_enabled "drivers" && export ENGINES="h2,postgres,sqlite,presto" || export ENGINES="h2"
-    if is_engine_enabled "crate"; then
-        run_step install-crate
-    fi
-    if is_engine_enabled "presto"; then
-        run_step install-presto
-    fi
-    MB_ENCRYPTION_SECRET_KEY='Orw0AAyzkO/kPTLJRxiyKoBHXa/d6ZcO+p+gpZO/wSQ=' MB_DB_TYPE=mysql MB_DB_DBNAME=circle_test MB_DB_PORT=3306 MB_DB_USER=ubuntu MB_DB_HOST=localhost \
-        run_step lein-test
-node-3() {
-    run_step yarn run lint
-    run_step yarn run flow
-    is_enabled "drivers" && export ENGINES="h2,redshift,druid,vertica" || export ENGINES="h2"
-    if is_engine_enabled "vertica"; then
-        run_step install-vertica
-    fi
-    # this is redundant with node 0 unless one of the non-H2 driver tests is enabled
-    if [ ENGINES != "h2" ]; then
-        run_step lein-test
-    fi
-node-4() {
-    run_step ./bin/reflection-linter
-    run_step ./bin/build version frontend sample-dataset uberjar
-    report-frontend-size
-    report-uberjar-size
-node-5() {
-    run_step lein with-profile +ci eastwood
-    run_step yarn run test-karma
-    run_step yarn run test-unit --coverage
-    report-frontend-coverage
-node-6() {
-    run_step ./bin/build-for-test
-    run_step check-uberjar-file-count
-    run_step yarn run test-integrated-no-build
-report() {
-  timestamp="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
-  name="$1"
-  value="$2"
-  if ! [ -z ${STATS_DB+x} ]; then
-    psql "$STATS_DB" -c "INSERT INTO build_stats (timestamp, name, value, build_number, node_index, branch, hash) VALUES ('$timestamp', '$name', $value, $CIRCLE_BUILD_NUM, $CIRCLE_NODE_INDEX, '$CIRCLE_BRANCH', '$CIRCLE_SHA1');" > /dev/null
-  fi
-report-frontend-coverage() {
-  report "frontend-coverage-lines" $(node -e "console.log(require('./coverage-summary.json').total.lines.pct)")
-  report "frontend-coverage-functions" $(node -e "console.log(require('./coverage-summary.json').total.functions.pct)")
-  report "frontend-coverage-branches" $(node -e "console.log(require('./coverage-summary.json').total.branches.pct)")
-  report "frontend-coverage-statements" $(node -e "console.log(require('./coverage-summary.json').total.statements.pct)")
-  report "frontend-loc" $(node -e "console.log(require('./coverage-summary.json')")
-report-frontend-size() {
-  report "frontend-size" "$(wc -c < resources/frontend_client/app/dist/app-main.bundle.js)"
-report-uberjar-size() {
-  report "uberjar-size" "$(wc -c < target/uberjar/metabase.jar)"
-install-crate() {
-    sudo add-apt-repository ppa:crate/stable -y
-    sudo apt-get update
-    sudo apt-get install -y crate
-    # ulimit setting refused Crate service to start on CircleCI container - so comment it
-    sudo sed -i '/MAX_LOCKED_MEMORY/s/^/#/' /etc/init/crate.conf
-    echo "psql.port: 5200" | sudo tee -a /etc/crate/crate.yml
-    sudo service crate restart
-install-mongodb() {
-    sudo apt-get purge mongodb-org*
-    sudo apt-key adv --keyserver hkp:// --recv 7F0CEB10
-    echo "deb precise/mongodb-org/3.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
-    sudo apt-get update
-    sudo apt-get install -y mongodb-org
-    sudo service mongod restart
-install-oracle() {
-    wget --output-document=plugins/ojdbc7.jar $ORACLE_JDBC_JAR
-install-vertica() {
-    wget --output-document=plugins/vertica-jdbc-7.1.2-0.jar $VERTICA_JDBC_JAR
-    docker run --detach --publish 5433:5433 sumitchawla/vertica
-    sleep 60
-install-presto() {
-    docker run --detach --publish 8080:8080 metabase/presto-mb-ci
-    sleep 10
-install-sparksql() {
-    # first, download the Spark Deps JAR and put it in the plugins/ dir
-    wget --output-document=plugins/spark-deps.jar
-    # next, download Spark and run it
-    spark_version='2.1.1' # Java 7 support was removed in Spark 2.2 so don't upgrade until we upgrade CI
-    hadoop_version='2.7'
-    spark_archive="spark-${spark_version}-bin-hadoop${hadoop_version}.tgz"
-    wget --progress dot -e dotbytes=250K "${spark_version}/${spark_archive}"
-    tar -xf $spark_archive
-    rm $spark_archive
-    spark_dir="$(pwd)/spark-${spark_version}-bin-hadoop${hadoop_version}"
-    java -Duser.timezone=Etc/UTC \
-         -Xmx512m \
-         -cp "${spark_dir}/conf:${spark_dir}/jars/*" \
-         org.apache.spark.deploy.SparkSubmit \
-         --master local[8] \
-         --conf spark.executor.extraJavaOptions=-Duser.timezone=Etc/UTC \
-         --conf spark.cores.max=1 \
-         --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 \
-         --name "Thrift JDBC/ODBC Server" \
-         --executor-memory 512m \
-         spark-internal &>/dev/null &
-lein-test() {
-    lein with-profile +ci test
-if [ -z ${CIRCLE_BRANCH_REGEX+x} ]; then
-    CIRCLE_BRANCH_REGEX='^master|release-.+$'
-is_enabled() {
-    (echo "$CIRCLE_BRANCH" | grep -qE "$CIRCLE_BRANCH_REGEX") ||
-    [[ "$CIRCLE_COMMIT_MESSAGE" == *"[ci $1]"* ]] ||
-    [[ "$CIRCLE_COMMIT_MESSAGE" == *"[ci all]"* ]]
-is_engine_enabled() {
-    [[ "$ENGINES" == *"$1"* ]]
-# Make sure uberjar has less than 64k files because that is the Java 7 LIMIT
-check-uberjar-file-count() {
-    if [ ! -f ./target/uberjar/metabase.jar ]; then
-        echo "Missing uberjar."
-        exit  1
-    fi
-    file_count=$(unzip -l target/uberjar/metabase.jar | wc -l)
-    echo "Uberjar has ${file_count} files."
-    if [ $file_count -gt 65535 ]; then
-        echo "Uberjar exceeds the 64k Java 7 file limit! We can't allow this. ¡Lo siento!"
-        exit 1
-    fi
-# print a summary on exit
-# records the time and exit code of each step
-run_step() {
-    status=0
-    start=$(date +%s)
-    # run in the background then `wait` so fail_fast can interrupt it
-    "$@" &
-    wait $! || status=$?
-    elapsed=$(expr $(date +%s) - $start || true)
-    summary="${summary}status=$status time=$elapsed command=$@\n"
-    report "run-status \"$*\"" "$status"
-    report "run-time \"$*\"" "$elapsed"
-    return $status
-summary() {
-    # if last status was failure then fail the rest of the nodes
-    if [ $status != 0 ]; then
-      fail_fast
-    fi
-    echo -e "========================================"
-    echo -en "$summary"
-    echo -e "========================================"
-trap summary EXIT
-fail_fast() {
-  if [ -z ${CIRCLE_NODE_TOTAL+x} ]; then
-    return 0
-  fi
-  echo -e "========================================"
-  echo -e "Failing fast! Stopping other nodes..."
-  # Touch a file to differentiate between a local failure and a
-  # failure triggered by another node
-  touch '/tmp/local-fail'
-  # ssh to the other CircleCI nodes and send SIGUSR1 to tell them to exit early
-  for (( i = 0; i < $CIRCLE_NODE_TOTAL; i++ )); do
-    if [ $i != $CIRCLE_NODE_INDEX ]; then
-      ssh node$i 'touch /tmp/fail; pkill -SIGUSR1 -f "bash ./bin/ci"' 2> /dev/null || true
-    fi
-  done
-exit_early() {
-  echo -e "========================================"
-  echo -e "Exited early! Did not necesssarily pass!"
-  pkill -TERM -P $$ || true
-  exit 0
-trap exit_early SIGUSR1
-if [ -z ${CIRCLE_BUILD_NUM+x} ]; then
-    export CIRCLE_BUILD_NUM="-1"
-if [ -z ${CIRCLE_SHA1+x} ]; then
-    export CIRCLE_SHA1="$(git rev-parse HEAD)"
-if [ -z ${CIRCLE_BRANCH+x} ]; then
-    export CIRCLE_BRANCH="$(git rev-parse --abbrev-ref HEAD)"
-export CIRCLE_COMMIT_MESSAGE="$(git log --format=oneline -n 1 $CIRCLE_SHA1)"
-# This local-fail check is to guard against two nodes failing at the
-# same time. Both nodes ssh to each node and drop /tmp/fail. Those
-# failing nodes then get here and see and the other node has told it
-# to exit early. This results in both nodes exiting early, and thus
-# not failing, causing the build to succeed
-if [[ -f "/tmp/fail" && ! -f "/tmp/local-fail" ]]; then
-  exit_early
-if [ -z ${CIRCLE_NODE_INDEX+x} ]; then
-    # If CIRCLE_NODE_INDEX isn't set, read node numbers from the args
-    # Useful for testing locally.
-    for i in "$@"; do
-      export CIRCLE_NODE_INDEX="$i"
-      node-$i
-    done
-    # Normal mode on CircleCI