Initial fingerprinting lab commit

This commit is contained in:
Peter Deutsch 2023-01-31 15:06:27 -05:00
commit 99454bfd29
No known key found for this signature in database
GPG Key ID: 6E1951D8B6025E7C
19 changed files with 799 additions and 0 deletions

22
LICENSE Normal file
View File

@ -0,0 +1,22 @@
MIT License
Copyright (c) 2022 Mengjia Yan, Joseph Ravichandran, Peter Deutsch,
Weon Taek Na, Jack Cook, Miles Dai, Miguel Gomez-Garcia
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

12
README.md Normal file
View File

@ -0,0 +1,12 @@
# Website Fingerprinting Lab
**Learning Objectives**
* Explore a new kind of side channel from a high level language.
* Understand how to reason about the root cause of misleading microarchitectural observations.
* Develop a real-world website fingerprinting attack that works on modern browsers.
**Description**
In this lab, students implement the techniques from our group's ISCA 2022 paper `There's Always a Bigger Fish: A Case Study of a Misunderstood Timing Side Channel`. Students will begin by implementing a seemingly familiar cache-based side channel attack in Javascript, and will then be asked to reason about why this attack works. Then, students will remove a core part of the attack, but see that the code still works.
**Setup**
Students can complete this lab on their own machines. MacOS, Linux, Windows all should work. Google Chrome is required for Part 4 of this lab.

111
automate.py Normal file
View File

@ -0,0 +1,111 @@
import argparse
import json
import logging
import os
import sys
import threading
import time
from flask import Flask, send_from_directory
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("--browser", type=str, choices=["chrome", "firefox", "safari"], default="chrome", help="Browser to run automation in.")
parser.add_argument("--domains", type=str, default="google.com,youtube.com,baidu.com,facebook.com", help="Comma-separated list of domain names to collect traces from. Defaults to google.com,youtube.com,baidu.com,facebook.com")
parser.add_argument("--enable_countermeasure", type=bool, default=False, help="Set to true to enable the countermeasure. Browser must be set to Chrome. Defaults to false.")
parser.add_argument("--num_traces_per_domain", type=int, default=40, help="Number of traces to collect per domain.")
parser.add_argument("--trace_length", type=int, default=5000, help="The length of each recorded trace, in milliseconds. Defaults to 5000.")
required = parser.add_argument_group("required arguments")
required.add_argument("--out_filename", type=str, required=True, help="Name of the output file to save traces to.")
required.add_argument("--part", type=int, choices=[2, 3, 4], required=True, help="Set to the part of the lab you're working on.")
opts = parser.parse_args()
if opts.browser != "chrome" and opts.enable_countermeasure:
print("Browser must be set to Chrome in order to enable the countermeasure.")
sys.exit(1)
if os.path.exists(opts.out_filename):
print(f"WARNING: Data already exists at {opts.out_filename}. What do you want to do?")
res = input("[C]ancel [O]verwrite ").lower()
if res == "o":
os.remove(opts.out_filename)
else:
sys.exit(1)
# Start serving attacker app
app = Flask(__name__)
# Disable Flask logs
os.environ["WERKZEUG_RUN_MAIN"] = "true"
log = logging.getLogger("werkzeug")
log.disabled = True
@app.route("/")
def root():
return send_from_directory(f"part{opts.part}", "index.html")
@app.route("/<path:path>")
def static_dir(path):
return send_from_directory(f"part{opts.part}", path)
flask_thread = threading.Thread(target=app.run, kwargs={"port": 1234})
flask_thread.setDaemon(True)
flask_thread.start()
# Setup
def get_browser(victim):
if opts.browser == "chrome":
chrome_opts = Options()
chrome_opts.add_experimental_option("excludeSwitches", ["enable-automation"])
if opts.enable_countermeasure and victim:
# Victim has the extension enabled -- attacker does not
chrome_opts.add_extension("part4/extension.crx")
return webdriver.Chrome(options=chrome_opts)
elif opts.browser == "firefox":
return webdriver.Firefox()
elif opts.browser == "safari":
return webdriver.Safari()
attacker = get_browser(victim=False)
attacker.get("http://localhost:1234")
attacker.execute_script(f"window.trace_length = {opts.trace_length}")
attacker.execute_script(f"window.using_automation_script = true")
def collect_trace(url):
victim = get_browser(victim=True)
attacker.execute_script("collectTrace()")
victim.get(url)
time.sleep(float(opts.trace_length) / 1000)
while attacker.execute_script("return recording"):
time.sleep(0.1)
victim.quit()
return attacker.execute_script("return traces")[-1]
# Collect traces
urls = [f"https://www.{domain}" for domain in opts.domains.split(",")]
traces = []
labels = []
for url in urls:
for i in range(opts.num_traces_per_domain):
traces.append(collect_trace(url))
labels.append(url)
with open(opts.out_filename, "w") as out:
json.dump({
"traces": traces,
"labels": labels
}, out, separators=(",", ":"))
attacker.quit()

185
index.html Normal file
View File

@ -0,0 +1,185 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Website Fingerprinting Lab</title>
<style>
* {
margin: 0;
padding: 0;
}
body {
font-family: Arial, Helvetica, sans-serif;
padding: 64px;
}
h1 {
font-size: 32px;
margin-bottom: 16px;
}
p {
margin-bottom: 8px;
}
#buttons {
margin-bottom: 16px;
}
button {
background: #fff;
border: 2px solid #3498db;
border-radius: 4px;
color: #3498db;
cursor: pointer;
display: inline-block;
font-size: 16px;
margin-right: 16px;
padding: 8px 16px;
}
button.disabled {
background: #ccc;
border-color: #ccc;
color: #666;
cursor: default;
}
.trace {
margin-bottom: 16px;
}
</style>
</head>
<body>
<h1>Website Fingerprinting Lab</h1>
<div id="buttons">
<button id="collect-trace">Collect trace</button>
<button id="download-traces">Download traces</button>
</div>
<div id="traces"></div>
<script src="https://d3js.org/d3.v6.js"></script>
<script type="text/javascript">
const worker = new Worker("worker.js");
const collectTraceButton = document.getElementById("collect-trace");
const downloadTracesButton = document.getElementById("download-traces");
// Default values for when the automation script isn't being used. When
// the script is in use, these values will get overwritten.
window.trace_length = 5000;
window.using_automation_script = false;
window.recording = false;
window.traces = [];
let traceIds = [];
worker.onmessage = (e) => {
window.recording = false;
const trace = JSON.parse(e.data);
window.traces.push(trace);
if (window.using_automation_script) {
// Don't display traces when automation script is in use
return;
}
// Create new trace div
const parent = document.getElementById("traces");
const div = document.createElement("div");
const traceId = "a" + Math.random().toString().substring(2, 10);
div.setAttribute("id", traceId);
div.className = "trace";
parent.appendChild(div);
traceIds.push(traceId);
// Trace dimensions
const width = parent.getBoundingClientRect().width;
const height = 64;
// Create div for new trace
const svg = d3
.select("#" + traceId)
.append("svg")
.attr("width", width)
.attr("height", height);
// Find largest value across all traces
const maxVal = d3.max(window.traces, (d) => d3.max(d));
for (let i = 0; i < window.traces.length; i++) {
// Re-visualize all traces each time in case maxVal changes
const x = d3
.scaleLinear()
.domain([0, window.traces[i].length])
.range([0, width]);
const color = d3
.scaleQuantize()
.range(["#0d0887", "#7e03a8", "#cc4778", "#f89540", "#f0f921"])
.domain([0, maxVal]);
svg
.selectAll()
.data(window.traces[i].map((x, i) => ({ index: i, value: x })))
.join("rect")
.attr("x", (d) => x(d.index))
.attr("y", 0)
.attr("width", x(1))
.attr("height", height)
.style("fill", (d) => color(d.value));
}
// Reset UI
collectTraceButton.innerText = "Collect trace";
collectTraceButton.className = "";
};
function collectTrace() {
collectTraceButton.innerText = "Collecting trace...";
collectTraceButton.className = "disabled";
window.recording = true;
worker.postMessage({
type: "start",
trace_length: window.trace_length,
});
}
collectTraceButton.onclick = () => {
if (window.recording) return;
window.recording = true;
collectTraceButton.innerText = "Starting in 3...";
collectTraceButton.className = "disabled";
setTimeout(() => {
collectTraceButton.innerText = "Starting in 2...";
setTimeout(() => {
collectTraceButton.innerText = "Starting in 1...";
setTimeout(collectTrace, 1000);
}, 1000);
}, 1000);
};
downloadTracesButton.onclick = () => {
const blob = new Blob([JSON.stringify({ traces: window.traces })], {
type: "application/json",
});
const url = URL.createObjectURL(blob);
const elem = document.createElement("a");
elem.href = url;
elem.download = "traces.json";
document.body.appendChild(elem);
elem.click();
document.body.removeChild(elem);
};
</script>
</body>
</html>

33
part1/warmup.html Normal file
View File

@ -0,0 +1,33 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Website Fingerprinting Lab</title>
<style>
* {
margin: 0;
padding: 0;
}
body {
font-family: Arial, Helvetica, sans-serif;
padding: 64px;
}
h1 {
font-size: 32px;
margin-bottom: 16px;
}
p {
margin-bottom: 8px;
}
</style>
</head>
<body>
<h1>Website Fingerprinting Lab Warmup</h1>
<p id="exercise1-values"></p>
<p id="exercise2-values"></p>
<script src="warmup.js"></script>
</body>
</html>

35
part1/warmup.js Normal file
View File

@ -0,0 +1,35 @@
const runs = 10;
function measureOneLine() {
const LINE_SIZE = 16; // 64/sizeof(int)
let result = [];
// Fill with -1 to ensure allocation
const M = new Array(runs * LINE_SIZE).fill(-1);
for (let i = 0; i < runs; i++) {
const start = performance.now();
let val = M[i * LINE_SIZE];
const end = performance.now();
result.push(end - start);
}
return result;
}
function measureNLines() {
let result = [];
// TODO: Exercise 2
return result;
}
document.getElementById(
"exercise1-values"
).innerText = `1 Cache Line: [${measureOneLine().join(", ")}]`;
document.getElementById(
"exercise2-values"
).innerText = `N Cache Lines: [${measureNLines().join(", ")}]`;

27
part2/eval.py Normal file
View File

@ -0,0 +1,27 @@
import json
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
def eval():
y_pred_full, y_test_full = [], []
# Re-train 10 times in order to reduce effects of randomness
for i in range(10):
### TODO: Exercise 5
### 1. Load data from traces file
### 2. Split data into X_train, X_test, y_train, y_test with train_test_split
### 3. Train classifier with X_train and y_train
### 4. Use classifier to make predictions on X_test. Save the result to a variable called y_pred
# Do not modify the next two lines
y_test_full.extend(y_test)
y_pred_full.extend(y_pred)
### TODO: Exercise 5 (continued)
### 5. Print classification report using y_test_full and y_pred_full
if __name__ == "__main__":
eval()

1
part2/index.html Symbolic link
View File

@ -0,0 +1 @@
../index.html

32
part2/worker.js Normal file
View File

@ -0,0 +1,32 @@
// Duration of your trace, in milliseconds
let TRACE_LENGTH;
// Array of length TRACE_LENGTH with your trace's values
let T;
// Value of performance.now() when you started recording your trace
let start;
function record() {
// Create empty array for saving trace values
T = new Array(TRACE_LENGTH);
// Fill array with -1 so we can be sure memory is allocated
T.fill(-1, 0, T.length);
// Save start timestamp
start = performance.now();
// TODO (Exercise 3): Record data for TRACE_LENGTH seconds and save values to T.
// Once done recording, send result to main thread
postMessage(JSON.stringify(T));
}
// DO NOT MODIFY BELOW THIS LINE -- PROVIDED BY COURSE STAFF
self.onmessage = (e) => {
if (e.data.type === "start") {
TRACE_LENGTH = e.data.trace_length;
setTimeout(record, 0);
}
};

27
part3/eval.py Normal file
View File

@ -0,0 +1,27 @@
import json
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
def eval():
y_pred_full, y_test_full = [], []
# Re-train 10 times in order to reduce effects of randomness
for i in range(10):
### TODO: Exercise 5
### 1. Load data from traces file
### 2. Split data into X_train, X_test, y_train, y_test with train_test_split
### 3. Train classifier with X_train and y_train
### 4. Use classifier to make predictions on X_test. Save the result to a variable called y_pred
# Do not modify the next two lines
y_test_full.extend(y_test)
y_pred_full.extend(y_pred)
### TODO: Exercise 5 (continued)
### 5. Print classification report using y_test_full and y_pred_full
if __name__ == "__main__":
eval()

1
part3/index.html Symbolic link
View File

@ -0,0 +1 @@
../index.html

32
part3/worker.js Normal file
View File

@ -0,0 +1,32 @@
// Duration of your trace, in milliseconds
let TRACE_LENGTH;
// Array of length TRACE_LENGTH with your trace's values
let T;
// Value of performance.now() when you started recording your trace
let start;
function record() {
// Create empty array for saving trace values
T = new Array(TRACE_LENGTH);
// Fill array with -1 so we can be sure memory is allocated
T.fill(-1, 0, T.length);
// Save start timestamp
start = performance.now();
// TODO (Exercise 7): Record data for TRACE_LENGTH seconds and save values to T.
// Once done recording, send result to main thread
postMessage(JSON.stringify(T));
}
// DO NOT MODIFY BELOW THIS LINE -- PROVIDED BY COURSE STAFF
self.onmessage = (e) => {
if (e.data.type === "start") {
TRACE_LENGTH = e.data.trace_length;
setTimeout(record, 0);
}
};

27
part4/eval.py Normal file
View File

@ -0,0 +1,27 @@
import json
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
def eval():
y_pred_full, y_test_full = [], []
# Re-train 10 times in order to reduce effects of randomness
for i in range(10):
### TODO: Exercise 5
### 1. Load data from traces file
### 2. Split data into X_train, X_test, y_train, y_test with train_test_split
### 3. Train classifier with X_train and y_train
### 4. Use classifier to make predictions on X_test. Save the result to a variable called y_pred
# Do not modify the next two lines
y_test_full.extend(y_test)
y_pred_full.extend(y_pred)
### TODO: Exercise 5 (continued)
### 5. Print classification report using y_test_full and y_pred_full
if __name__ == "__main__":
eval()

BIN
part4/extension.crx Normal file

Binary file not shown.

View File

@ -0,0 +1,162 @@
const domains = [
"https://www.google.com/",
"https://www.youtube.com/",
"https://www.tmall.com/",
"https://www.qq.com/",
"https://www.baidu.com/",
"https://www.sohu.com/",
"https://www.facebook.com/",
"https://www.taobao.com/",
"https://www.jd.com/",
"https://www.amazon.com/",
"https://www.yahoo.com/",
"https://www.wikipedia.org/",
"https://www.weibo.com/",
"https://sina.com.cn/",
"https://www.zoom.us/",
"http://www.xinhuanet.com/",
"https://www.live.com/",
"https://www.reddit.com/",
"https://www.netflix.com/",
"https://www.microsoft.com/",
"https://www.instagram.com/",
"https://www.office.com/",
"https://panda.tv/",
"https://www.zhanqi.tv/",
"https://www.alipay.com/",
"https://www.bing.com/",
"https://www.csdn.net/",
"https://www.vk.com/",
"https://www.myshopify.com/",
"https://www.naver.com/",
"https://www.okezone.com/",
"https://www.twitch.tv/",
"https://www.twitter.com/",
"https://www.ebay.com/",
"https://www.adobe.com/",
"https://www.tianya.cn/",
"https://www.huanqiu.com/",
"https://www.yy.com/",
"https://www.aliexpress.com/",
"https://www.linkedin.com/",
"https://www.force.com/",
"https://www.aparat.com/",
"https://www.mail.ru/",
"https://www.msn.com/",
"https://www.dropbox.com/",
"https://www.whatsapp.com/",
"https://www.apple.com/",
"https://www.1688.com/",
"https://www.wordpress.com/",
"https://www.canva.com/",
"https://www.indeed.com/",
"https://www.stackoverflow.com/",
"https://www.ok.ru/",
"https://www.so.com/",
"https://www.chase.com/",
"https://www.imdb.com/",
"https://www.slack.com/",
"https://www.etsy.com/",
"https://www.tiktok.com/",
"https://www.booking.com/",
"https://www.babytree.com/",
"https://rakuten.co.jp/",
"https://www.salesforce.com/",
"https://www.spotify.com/",
"https://www.tribunnews.com/",
"https://www.fandom.com/",
"https://www.tradingview.com/",
"https://www.github.com/",
"https://www.haosou.com/",
"https://www.paypal.com/",
"https://www.cnblogs.com/",
"https://www.alibaba.com/",
"https://www.kompas.com/",
"https://gome.com.cn/",
"https://www.walmart.com/",
"https://www.roblox.com/",
"https://www.6.cn/",
"https://www.zillow.com/",
"https://www.godaddy.com/",
"https://www.imgur.com/",
"https://www.espn.com/",
"https://www.bbc.com/",
"https://www.hao123.com/",
"https://www.pikiran-rakyat.com/",
"https://www.grammarly.com/",
"https://www.cnn.com/",
"https://www.telegram.org/",
"https://www.tumblr.com/",
"https://www.nytimes.com/",
"https://www.detik.com/",
"https://www.wetransfer.com/",
"https://www.savefrom.net/",
"https://www.rednet.cn/",
"https://www.freepik.com/",
"https://www.ilovepdf.com/",
"https://www.daum.net/",
"https://www.pinterest.com/",
"https://www.primevideo.com/",
"https://www.intuit.com/",
"https://www.medium.com/",
];
const loadTime = 5000;
let loading = false;
let startTime = 0;
function randomPing() {
const controller = new AbortController();
const id = setTimeout(
() => controller.abort(),
loadTime - (performance.now() - startTime)
);
fetch(
domains[Math.floor(Math.random() * domains.length)] +
"?" +
new Date().getTime(),
{
signal: controller.signal,
}
);
}
function activityBurst() {
switch (Math.floor(Math.random() * 2)) {
case 0:
let start = performance.now();
let counter = 0;
while (performance.now() - start < 5) {
counter += 1;
}
console.log(counter);
break;
case 1:
randomPing();
break;
}
}
chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => {
if (changeInfo.status === "loading") {
if (loading) {
return;
}
startTime = performance.now();
loading = true;
for (let i = 0; i < 20; i++) {
randomPing();
}
for (let i = 0; i < loadTime / 10; i++) {
setTimeout(activityBurst, Math.random() * loadTime);
}
} else if (changeInfo.status === "complete") {
loading = false;
}
});

View File

@ -0,0 +1,10 @@
{
"name": "Lab Countermeasure",
"version": "1.0",
"manifest_version": 3,
"background": {
"service_worker": "background.js"
},
"action": {},
"host_permissions": ["*://*/*"]
}

1
part4/index.html Symbolic link
View File

@ -0,0 +1 @@
../index.html

33
part4/worker.js Normal file
View File

@ -0,0 +1,33 @@
// Duration of your trace, in milliseconds
let TRACE_LENGTH;
// Array of length TRACE_LENGTH with your trace's values
let T;
// Value of performance.now() when you started recording your trace
let start;
function record() {
// Create empty array for saving trace values
T = new Array(TRACE_LENGTH);
// Fill array with -1 so we can be sure memory is allocated
T.fill(-1, 0, T.length);
// Save start timestamp
start = performance.now();
// TODO (Exercise 8): Copy your solution from part 3
// TODO (Exercise 9): Optionally make changes to your part 3 solution if you need to
// Once done recording, send result to main thread
postMessage(JSON.stringify(T));
}
// DO NOT MODIFY BELOW THIS LINE -- PROVIDED BY COURSE STAFF
self.onmessage = (e) => {
if (e.data.type === "start") {
TRACE_LENGTH = e.data.trace_length;
setTimeout(record, 0);
}
};

48
update.sh Normal file
View File

@ -0,0 +1,48 @@
#!/bin/bash
# Updates repository to latest starter code
#
# Adapted from Oliver Beckstein's ASU-CompMethodsPhysics-PHY494 course 2016-2020 placed into the public domain
# With GitHub template repositories one needs to use --allow-unrelated-histories
# at least once. https://help.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-from-a-template
progname="$0"
REMOTE_NAME="startercode"
REMOTE_URL="https://github.com/CSAIL-Arch-Sec/SHD-WebsiteFingerprintingLab.git"
# progname, from top dir
UPDATESH="./deploy/$(basename $progname)"
CONTACT_MESSAGE="Contact the instructor and TA with a screen shot of ALL output from running $0."
function die () {
local msg="$1" err=${2:-1}
echo "ERROR: ${msg}."
exit $err
}
# ensure everything relative to top dir
topdir="$(git rev-parse --show-toplevel)" || die "Failed to get rootdir"
cd "${topdir}" || die "Failed to get to the git root dir ${rootdir}"
# first time
# 1. set remote repo
# 2. merge histories between student (template) and remote skeleton
if ! git remote get-url ${REMOTE_NAME} >/dev/null 2>&1; then
echo "Adding remote repository '${REMOTE_NAME}'."
git remote add ${REMOTE_NAME} ${REMOTE_URL}
echo "Merging histories for the first time..."
set -x
git pull --allow-unrelated-histories -s recursive -X theirs --no-edit ${REMOTE_NAME} main || \
{ git rev-list -1 MERGE_HEAD >/dev/null 2>&1 && git merge --abort ; \
git remote rm ${REMOTE_NAME}; \
die "Failed to merge histories. ${CONTACT_MESSAGE}" $?; }
set +x
fi
echo "updating repository... git pull from ${REMOTE_NAME}"
git pull --no-edit ${REMOTE_NAME} main || die "Failed to pull from ${REMOTE_NAME}. ${CONTACT_MESSAGE}"