SHD-CacheAttackLab/Part1-Timing/utility.h

149 lines
4.4 KiB
C

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <assert.h>
#ifndef __UTILITY_H__
#define __UTILITY_H__
#define SAMPLES 10
// Function to read the time stamp counter, which is called tsc for short
// "rdtscpp" returns a 32bit unsigned integer
// "rdtscpp64" return a 64 bit unsigned integer
// Details in https://www.felixcloutier.com/x86/rdtscpp
static inline uint32_t rdtscpp() {
uint32_t rv;
asm volatile ("rdtscpp": "=a" (rv) :: "edx", "ecx");
return rv;
}
static inline uint64_t rdtscpp64() {
uint32_t low, high;
asm volatile ("rdtscpp": "=a" (low), "=d" (high) :: "ecx");
return (((uint64_t)high) << 32) | low;
}
// Function "lfence" wrap the assembly instruction lfence
// This function performs a serializing operation which ensures that
// the instructions after "lfence" start execution after
// all the instructions before "lfence" complete
// Details in https://www.felixcloutier.com/x86/lfence
static inline void lfence() {
asm volatile("lfence");
}
// Here is an example of using "rdtscp" and "mfence" to
// measure the time it takes to access a block specified by its virtual address
// The corresponding pseudo code is
// =========
// t1 = rdtscp
// load addr
// t2 = rdtscp
// cycles = t2 - t1
// return cycles
// =========
static inline uint64_t measure_one_block_access_time(uint64_t addr)
{
uint64_t cycles;
asm volatile("mov %1, %%r8\n\t"
"mfence\n\t"
"lfence\n\t"
"rdtscp\n\t"
"mov %%eax, %%edi\n\t"
"mov (%%r8), %%r8\n\t"
"rdtscp\n\t"
"sub %%edi, %%eax\n\t"
: "=a"(cycles) /*output*/
: "r"(addr) /*input*/
: "r8", "edi"); /*reserved register*/
return cycles;
}
static inline uint64_t one_block_access(uint64_t addr)
{
asm volatile("mov (%0), %%r8\n\t"
: /*output*/
: "r"(addr) /*input*/
: "r8"); /*reserved register*/
}
// A wrapper function of the clflush instruction
// The instruction evict the given address from the cache to DRAM
// so that the next time the line is accessed, it will be fetched from DRAM
// Details in https://www.felixcloutier.com/x86/clflush
static inline void clflush(void *v) {
asm volatile ("clflush 0(%0)": : "r" (v):);
}
// Supporting functions for printing results in different formats
// Function "compare" is used in the priting functions and you do not need it
int compare(const void *p1, const void *p2) {
uint64_t u1 = *(uint64_t *)p1;
uint64_t u2 = *(uint64_t *)p2;
return (int)u1 - (int)u2;
}
// Print out the latencies you measured
void print_results(uint64_t* dram, uint64_t* l1, uint64_t* l2, uint64_t* l3) {
qsort(dram, SAMPLES, sizeof(uint64_t), compare);
qsort(l1, SAMPLES, sizeof(uint64_t), compare);
qsort(l2, SAMPLES, sizeof(uint64_t), compare);
qsort(l3, SAMPLES, sizeof(uint64_t), compare);
printf(" : L1 L2 L3 Mem \n");
printf("Minimum : %5ld %5ld %5ld %5ld\n", l1[0], l2[0], l3[0], dram[0]);
printf("Bottom decile: %5ld %5ld %5ld %5ld\n", l1[SAMPLES/10], l2[SAMPLES/10],
l3[SAMPLES/10], dram[SAMPLES/10]);
printf("Median : %5ld %5ld %5ld %5ld\n", l1[SAMPLES/2], l2[SAMPLES/2],
l3[SAMPLES/2], dram[SAMPLES/2]);
printf("Top decile : %5ld %5ld %5ld %5ld\n", l1[(SAMPLES * 9)/10], l2[(SAMPLES * 9)/10],
l3[(SAMPLES * 9)/10], dram[(SAMPLES * 9)/10]);
printf("Maximum : %5ld %5ld %5ld %5ld\n", l1[SAMPLES-1], l2[SAMPLES-1],
l3[SAMPLES-1], dram[SAMPLES-1]);
}
// Format the latencies for part 1.5
void print_results_for_python(uint64_t* dram, uint64_t* l1, uint64_t* l2, uint64_t* l3)
{
qsort(dram, SAMPLES, sizeof(uint64_t), compare);
qsort(l1, SAMPLES, sizeof(uint64_t), compare);
qsort(l2, SAMPLES, sizeof(uint64_t), compare);
qsort(l3, SAMPLES, sizeof(uint64_t), compare);
for (int i = 0; i < SAMPLES; i++) {
printf("%ld ", l1[i]);
}
printf("\n");
for (int i = 0; i < SAMPLES; i++) {
printf("%ld ", l2[i]);
}
printf("\n");
for (int i = 0; i < SAMPLES; i++) {
printf("%ld ", l3[i]);
}
printf("\n");
for (int i = 0; i < SAMPLES; i++) {
printf("%ld ", dram[i]);
}
printf("\n");
}
#endif // _UTILITY_H__