commit 99454bfd299360365cf2933fbc1108f24285add8 Author: Peter Deutsch Date: Tue Jan 31 15:06:27 2023 -0500 Initial fingerprinting lab commit diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..34d97eb --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2022 Mengjia Yan, Joseph Ravichandran, Peter Deutsch, + Weon Taek Na, Jack Cook, Miles Dai, Miguel Gomez-Garcia + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e4ace86 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +# Website Fingerprinting Lab + +**Learning Objectives** +* Explore a new kind of side channel from a high level language. +* Understand how to reason about the root cause of misleading microarchitectural observations. +* Develop a real-world website fingerprinting attack that works on modern browsers. + +**Description** +In this lab, students implement the techniques from our group's ISCA 2022 paper `There's Always a Bigger Fish: A Case Study of a Misunderstood Timing Side Channel`. Students will begin by implementing a seemingly familiar cache-based side channel attack in Javascript, and will then be asked to reason about why this attack works. Then, students will remove a core part of the attack, but see that the code still works. + +**Setup** +Students can complete this lab on their own machines. MacOS, Linux, Windows all should work. Google Chrome is required for Part 4 of this lab. diff --git a/automate.py b/automate.py new file mode 100644 index 0000000..a25f908 --- /dev/null +++ b/automate.py @@ -0,0 +1,111 @@ +import argparse +import json +import logging +import os +import sys +import threading +import time + +from flask import Flask, send_from_directory +from selenium import webdriver +from selenium.webdriver.chrome.options import Options + +# Parse arguments +parser = argparse.ArgumentParser() +parser.add_argument("--browser", type=str, choices=["chrome", "firefox", "safari"], default="chrome", help="Browser to run automation in.") +parser.add_argument("--domains", type=str, default="google.com,youtube.com,baidu.com,facebook.com", help="Comma-separated list of domain names to collect traces from. Defaults to google.com,youtube.com,baidu.com,facebook.com") +parser.add_argument("--enable_countermeasure", type=bool, default=False, help="Set to true to enable the countermeasure. Browser must be set to Chrome. Defaults to false.") +parser.add_argument("--num_traces_per_domain", type=int, default=40, help="Number of traces to collect per domain.") +parser.add_argument("--trace_length", type=int, default=5000, help="The length of each recorded trace, in milliseconds. Defaults to 5000.") + +required = parser.add_argument_group("required arguments") +required.add_argument("--out_filename", type=str, required=True, help="Name of the output file to save traces to.") +required.add_argument("--part", type=int, choices=[2, 3, 4], required=True, help="Set to the part of the lab you're working on.") + +opts = parser.parse_args() + +if opts.browser != "chrome" and opts.enable_countermeasure: + print("Browser must be set to Chrome in order to enable the countermeasure.") + sys.exit(1) + +if os.path.exists(opts.out_filename): + print(f"WARNING: Data already exists at {opts.out_filename}. What do you want to do?") + res = input("[C]ancel [O]verwrite ").lower() + + if res == "o": + os.remove(opts.out_filename) + else: + sys.exit(1) + +# Start serving attacker app +app = Flask(__name__) + +# Disable Flask logs +os.environ["WERKZEUG_RUN_MAIN"] = "true" +log = logging.getLogger("werkzeug") +log.disabled = True + +@app.route("/") +def root(): + return send_from_directory(f"part{opts.part}", "index.html") + +@app.route("/") +def static_dir(path): + return send_from_directory(f"part{opts.part}", path) + +flask_thread = threading.Thread(target=app.run, kwargs={"port": 1234}) +flask_thread.setDaemon(True) +flask_thread.start() + +# Setup +def get_browser(victim): + if opts.browser == "chrome": + chrome_opts = Options() + chrome_opts.add_experimental_option("excludeSwitches", ["enable-automation"]) + + if opts.enable_countermeasure and victim: + # Victim has the extension enabled -- attacker does not + chrome_opts.add_extension("part4/extension.crx") + + return webdriver.Chrome(options=chrome_opts) + elif opts.browser == "firefox": + return webdriver.Firefox() + elif opts.browser == "safari": + return webdriver.Safari() + +attacker = get_browser(victim=False) +attacker.get("http://localhost:1234") +attacker.execute_script(f"window.trace_length = {opts.trace_length}") +attacker.execute_script(f"window.using_automation_script = true") + +def collect_trace(url): + victim = get_browser(victim=True) + + attacker.execute_script("collectTrace()") + victim.get(url) + + time.sleep(float(opts.trace_length) / 1000) + + while attacker.execute_script("return recording"): + time.sleep(0.1) + + victim.quit() + return attacker.execute_script("return traces")[-1] + +# Collect traces +urls = [f"https://www.{domain}" for domain in opts.domains.split(",")] +traces = [] +labels = [] + +for url in urls: + for i in range(opts.num_traces_per_domain): + traces.append(collect_trace(url)) + labels.append(url) + +with open(opts.out_filename, "w") as out: + json.dump({ + "traces": traces, + "labels": labels + }, out, separators=(",", ":")) + +attacker.quit() diff --git a/index.html b/index.html new file mode 100644 index 0000000..eff4b48 --- /dev/null +++ b/index.html @@ -0,0 +1,185 @@ + + + + + Website Fingerprinting Lab + + + +

Website Fingerprinting Lab

+
+ + +
+
+ + + + diff --git a/part1/warmup.html b/part1/warmup.html new file mode 100644 index 0000000..304da7e --- /dev/null +++ b/part1/warmup.html @@ -0,0 +1,33 @@ + + + + + Website Fingerprinting Lab + + + +

Website Fingerprinting Lab Warmup

+

+

+ + + diff --git a/part1/warmup.js b/part1/warmup.js new file mode 100644 index 0000000..d7b276b --- /dev/null +++ b/part1/warmup.js @@ -0,0 +1,35 @@ +const runs = 10; + +function measureOneLine() { + const LINE_SIZE = 16; // 64/sizeof(int) + let result = []; + + // Fill with -1 to ensure allocation + const M = new Array(runs * LINE_SIZE).fill(-1); + + for (let i = 0; i < runs; i++) { + const start = performance.now(); + let val = M[i * LINE_SIZE]; + const end = performance.now(); + + result.push(end - start); + } + + return result; +} + +function measureNLines() { + let result = []; + + // TODO: Exercise 2 + + return result; +} + +document.getElementById( + "exercise1-values" +).innerText = `1 Cache Line: [${measureOneLine().join(", ")}]`; + +document.getElementById( + "exercise2-values" +).innerText = `N Cache Lines: [${measureNLines().join(", ")}]`; diff --git a/part2/eval.py b/part2/eval.py new file mode 100644 index 0000000..ceedece --- /dev/null +++ b/part2/eval.py @@ -0,0 +1,27 @@ +import json +import numpy as np + +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import classification_report +from sklearn.model_selection import train_test_split + +def eval(): + y_pred_full, y_test_full = [], [] + + # Re-train 10 times in order to reduce effects of randomness + for i in range(10): + ### TODO: Exercise 5 + ### 1. Load data from traces file + ### 2. Split data into X_train, X_test, y_train, y_test with train_test_split + ### 3. Train classifier with X_train and y_train + ### 4. Use classifier to make predictions on X_test. Save the result to a variable called y_pred + + # Do not modify the next two lines + y_test_full.extend(y_test) + y_pred_full.extend(y_pred) + + ### TODO: Exercise 5 (continued) + ### 5. Print classification report using y_test_full and y_pred_full + +if __name__ == "__main__": + eval() diff --git a/part2/index.html b/part2/index.html new file mode 120000 index 0000000..79c5d6f --- /dev/null +++ b/part2/index.html @@ -0,0 +1 @@ +../index.html \ No newline at end of file diff --git a/part2/worker.js b/part2/worker.js new file mode 100644 index 0000000..44820da --- /dev/null +++ b/part2/worker.js @@ -0,0 +1,32 @@ +// Duration of your trace, in milliseconds +let TRACE_LENGTH; + +// Array of length TRACE_LENGTH with your trace's values +let T; + +// Value of performance.now() when you started recording your trace +let start; + +function record() { + // Create empty array for saving trace values + T = new Array(TRACE_LENGTH); + + // Fill array with -1 so we can be sure memory is allocated + T.fill(-1, 0, T.length); + + // Save start timestamp + start = performance.now(); + + // TODO (Exercise 3): Record data for TRACE_LENGTH seconds and save values to T. + + // Once done recording, send result to main thread + postMessage(JSON.stringify(T)); +} + +// DO NOT MODIFY BELOW THIS LINE -- PROVIDED BY COURSE STAFF +self.onmessage = (e) => { + if (e.data.type === "start") { + TRACE_LENGTH = e.data.trace_length; + setTimeout(record, 0); + } +}; diff --git a/part3/eval.py b/part3/eval.py new file mode 100644 index 0000000..ceedece --- /dev/null +++ b/part3/eval.py @@ -0,0 +1,27 @@ +import json +import numpy as np + +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import classification_report +from sklearn.model_selection import train_test_split + +def eval(): + y_pred_full, y_test_full = [], [] + + # Re-train 10 times in order to reduce effects of randomness + for i in range(10): + ### TODO: Exercise 5 + ### 1. Load data from traces file + ### 2. Split data into X_train, X_test, y_train, y_test with train_test_split + ### 3. Train classifier with X_train and y_train + ### 4. Use classifier to make predictions on X_test. Save the result to a variable called y_pred + + # Do not modify the next two lines + y_test_full.extend(y_test) + y_pred_full.extend(y_pred) + + ### TODO: Exercise 5 (continued) + ### 5. Print classification report using y_test_full and y_pred_full + +if __name__ == "__main__": + eval() diff --git a/part3/index.html b/part3/index.html new file mode 120000 index 0000000..79c5d6f --- /dev/null +++ b/part3/index.html @@ -0,0 +1 @@ +../index.html \ No newline at end of file diff --git a/part3/worker.js b/part3/worker.js new file mode 100644 index 0000000..ef43d59 --- /dev/null +++ b/part3/worker.js @@ -0,0 +1,32 @@ +// Duration of your trace, in milliseconds +let TRACE_LENGTH; + +// Array of length TRACE_LENGTH with your trace's values +let T; + +// Value of performance.now() when you started recording your trace +let start; + +function record() { + // Create empty array for saving trace values + T = new Array(TRACE_LENGTH); + + // Fill array with -1 so we can be sure memory is allocated + T.fill(-1, 0, T.length); + + // Save start timestamp + start = performance.now(); + + // TODO (Exercise 7): Record data for TRACE_LENGTH seconds and save values to T. + + // Once done recording, send result to main thread + postMessage(JSON.stringify(T)); +} + +// DO NOT MODIFY BELOW THIS LINE -- PROVIDED BY COURSE STAFF +self.onmessage = (e) => { + if (e.data.type === "start") { + TRACE_LENGTH = e.data.trace_length; + setTimeout(record, 0); + } +}; diff --git a/part4/eval.py b/part4/eval.py new file mode 100644 index 0000000..e9369e7 --- /dev/null +++ b/part4/eval.py @@ -0,0 +1,27 @@ +import json +import numpy as np + +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import classification_report +from sklearn.model_selection import train_test_split + +def eval(): + y_pred_full, y_test_full = [], [] + + # Re-train 10 times in order to reduce effects of randomness + for i in range(10): + ### TODO: Exercise 5 + ### 1. Load data from traces file + ### 2. Split data into X_train, X_test, y_train, y_test with train_test_split + ### 3. Train classifier with X_train and y_train + ### 4. Use classifier to make predictions on X_test. Save the result to a variable called y_pred + + # Do not modify the next two lines + y_test_full.extend(y_test) + y_pred_full.extend(y_pred) + + ### TODO: Exercise 5 (continued) + ### 5. Print classification report using y_test_full and y_pred_full + +if __name__ == "__main__": + eval() diff --git a/part4/extension.crx b/part4/extension.crx new file mode 100644 index 0000000..c857b6d Binary files /dev/null and b/part4/extension.crx differ diff --git a/part4/extension/background.js b/part4/extension/background.js new file mode 100644 index 0000000..e85606f --- /dev/null +++ b/part4/extension/background.js @@ -0,0 +1,162 @@ +const domains = [ + "https://www.google.com/", + "https://www.youtube.com/", + "https://www.tmall.com/", + "https://www.qq.com/", + "https://www.baidu.com/", + "https://www.sohu.com/", + "https://www.facebook.com/", + "https://www.taobao.com/", + "https://www.jd.com/", + "https://www.amazon.com/", + "https://www.yahoo.com/", + "https://www.wikipedia.org/", + "https://www.weibo.com/", + "https://sina.com.cn/", + "https://www.zoom.us/", + "http://www.xinhuanet.com/", + "https://www.live.com/", + "https://www.reddit.com/", + "https://www.netflix.com/", + "https://www.microsoft.com/", + "https://www.instagram.com/", + "https://www.office.com/", + "https://panda.tv/", + "https://www.zhanqi.tv/", + "https://www.alipay.com/", + "https://www.bing.com/", + "https://www.csdn.net/", + "https://www.vk.com/", + "https://www.myshopify.com/", + "https://www.naver.com/", + "https://www.okezone.com/", + "https://www.twitch.tv/", + "https://www.twitter.com/", + "https://www.ebay.com/", + "https://www.adobe.com/", + "https://www.tianya.cn/", + "https://www.huanqiu.com/", + "https://www.yy.com/", + "https://www.aliexpress.com/", + "https://www.linkedin.com/", + "https://www.force.com/", + "https://www.aparat.com/", + "https://www.mail.ru/", + "https://www.msn.com/", + "https://www.dropbox.com/", + "https://www.whatsapp.com/", + "https://www.apple.com/", + "https://www.1688.com/", + "https://www.wordpress.com/", + "https://www.canva.com/", + "https://www.indeed.com/", + "https://www.stackoverflow.com/", + "https://www.ok.ru/", + "https://www.so.com/", + "https://www.chase.com/", + "https://www.imdb.com/", + "https://www.slack.com/", + "https://www.etsy.com/", + "https://www.tiktok.com/", + "https://www.booking.com/", + "https://www.babytree.com/", + "https://rakuten.co.jp/", + "https://www.salesforce.com/", + "https://www.spotify.com/", + "https://www.tribunnews.com/", + "https://www.fandom.com/", + "https://www.tradingview.com/", + "https://www.github.com/", + "https://www.haosou.com/", + "https://www.paypal.com/", + "https://www.cnblogs.com/", + "https://www.alibaba.com/", + "https://www.kompas.com/", + "https://gome.com.cn/", + "https://www.walmart.com/", + "https://www.roblox.com/", + "https://www.6.cn/", + "https://www.zillow.com/", + "https://www.godaddy.com/", + "https://www.imgur.com/", + "https://www.espn.com/", + "https://www.bbc.com/", + "https://www.hao123.com/", + "https://www.pikiran-rakyat.com/", + "https://www.grammarly.com/", + "https://www.cnn.com/", + "https://www.telegram.org/", + "https://www.tumblr.com/", + "https://www.nytimes.com/", + "https://www.detik.com/", + "https://www.wetransfer.com/", + "https://www.savefrom.net/", + "https://www.rednet.cn/", + "https://www.freepik.com/", + "https://www.ilovepdf.com/", + "https://www.daum.net/", + "https://www.pinterest.com/", + "https://www.primevideo.com/", + "https://www.intuit.com/", + "https://www.medium.com/", +]; + +const loadTime = 5000; +let loading = false; +let startTime = 0; + +function randomPing() { + const controller = new AbortController(); + const id = setTimeout( + () => controller.abort(), + loadTime - (performance.now() - startTime) + ); + + fetch( + domains[Math.floor(Math.random() * domains.length)] + + "?" + + new Date().getTime(), + { + signal: controller.signal, + } + ); +} + +function activityBurst() { + switch (Math.floor(Math.random() * 2)) { + case 0: + let start = performance.now(); + let counter = 0; + + while (performance.now() - start < 5) { + counter += 1; + } + + console.log(counter); + break; + case 1: + randomPing(); + break; + } +} + +chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => { + if (changeInfo.status === "loading") { + if (loading) { + return; + } + + startTime = performance.now(); + loading = true; + + for (let i = 0; i < 20; i++) { + randomPing(); + } + + for (let i = 0; i < loadTime / 10; i++) { + setTimeout(activityBurst, Math.random() * loadTime); + } + } else if (changeInfo.status === "complete") { + loading = false; + } +}); diff --git a/part4/extension/manifest.json b/part4/extension/manifest.json new file mode 100644 index 0000000..cd43bd6 --- /dev/null +++ b/part4/extension/manifest.json @@ -0,0 +1,10 @@ +{ + "name": "Lab Countermeasure", + "version": "1.0", + "manifest_version": 3, + "background": { + "service_worker": "background.js" + }, + "action": {}, + "host_permissions": ["*://*/*"] +} diff --git a/part4/index.html b/part4/index.html new file mode 120000 index 0000000..79c5d6f --- /dev/null +++ b/part4/index.html @@ -0,0 +1 @@ +../index.html \ No newline at end of file diff --git a/part4/worker.js b/part4/worker.js new file mode 100644 index 0000000..8b73c39 --- /dev/null +++ b/part4/worker.js @@ -0,0 +1,33 @@ +// Duration of your trace, in milliseconds +let TRACE_LENGTH; + +// Array of length TRACE_LENGTH with your trace's values +let T; + +// Value of performance.now() when you started recording your trace +let start; + +function record() { + // Create empty array for saving trace values + T = new Array(TRACE_LENGTH); + + // Fill array with -1 so we can be sure memory is allocated + T.fill(-1, 0, T.length); + + // Save start timestamp + start = performance.now(); + + // TODO (Exercise 8): Copy your solution from part 3 + // TODO (Exercise 9): Optionally make changes to your part 3 solution if you need to + + // Once done recording, send result to main thread + postMessage(JSON.stringify(T)); +} + +// DO NOT MODIFY BELOW THIS LINE -- PROVIDED BY COURSE STAFF +self.onmessage = (e) => { + if (e.data.type === "start") { + TRACE_LENGTH = e.data.trace_length; + setTimeout(record, 0); + } +}; diff --git a/update.sh b/update.sh new file mode 100644 index 0000000..3de44f5 --- /dev/null +++ b/update.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Updates repository to latest starter code +# +# Adapted from Oliver Beckstein's ASU-CompMethodsPhysics-PHY494 course 2016-2020 placed into the public domain + +# With GitHub template repositories one needs to use --allow-unrelated-histories +# at least once. https://help.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-from-a-template + +progname="$0" +REMOTE_NAME="startercode" +REMOTE_URL="https://github.com/CSAIL-Arch-Sec/SHD-WebsiteFingerprintingLab.git" + +# progname, from top dir +UPDATESH="./deploy/$(basename $progname)" + +CONTACT_MESSAGE="Contact the instructor and TA with a screen shot of ALL output from running $0." + +function die () { + local msg="$1" err=${2:-1} + echo "ERROR: ${msg}." + exit $err +} + +# ensure everything relative to top dir +topdir="$(git rev-parse --show-toplevel)" || die "Failed to get rootdir" +cd "${topdir}" || die "Failed to get to the git root dir ${rootdir}" + + +# first time +# 1. set remote repo +# 2. merge histories between student (template) and remote skeleton + +if ! git remote get-url ${REMOTE_NAME} >/dev/null 2>&1; then + echo "Adding remote repository '${REMOTE_NAME}'." + git remote add ${REMOTE_NAME} ${REMOTE_URL} + + echo "Merging histories for the first time..." + set -x + git pull --allow-unrelated-histories -s recursive -X theirs --no-edit ${REMOTE_NAME} main || \ + { git rev-list -1 MERGE_HEAD >/dev/null 2>&1 && git merge --abort ; \ + git remote rm ${REMOTE_NAME}; \ + die "Failed to merge histories. ${CONTACT_MESSAGE}" $?; } + + set +x +fi + +echo "updating repository... git pull from ${REMOTE_NAME}" +git pull --no-edit ${REMOTE_NAME} main || die "Failed to pull from ${REMOTE_NAME}. ${CONTACT_MESSAGE}"