{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib\n",
"import numpy as np\n",
"from mpmath import mp\n",
"import matplotlib.pyplot as plt\n",
"from mpl_toolkits.axes_grid1 import host_subplot\n",
"import mpl_toolkits.axisartist as AA\n",
"import math\n",
"import warnings\n",
"from ipywidgets import interact, interactive, fixed, interact_manual\n",
"plt.rcParams['figure.dpi'] = 180\n",
"plt.rcParams['figure.figsize'] = [12.0, 8.0]\n",
"plt.rcParams['text.latex.unicode'] = True\n",
"plt.rcParams['text.usetex'] = True\n",
"plt.rcParams['mathtext.fontset'] = 'stix'\n",
"plt.rcParams['font.family'] = 'STIXGeneral'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"log_sizes = np.array([i for i in range(1,35)])\n",
"sizes = np.array([2**i for i in range(1,35)])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Register file size is speculative\n",
"# Defaults are for AWS EC2 c5.2xlarge, an \"Intel(R) Xeon(R) Platinum 8124M CPU @ 3.00GHz\"\n",
"def init_plot(reg_file=128 * 8, l1=32*1024, l2=1024**2, l3=24 * 1024**2, ram=16 * 1024**3, disk=256 * 1024**3):\n",
" plt.xlabel('Size $\\\\left[\\\\log_2 n\\\\right]$')\n",
" plt.xscale('log')\n",
" plt.xticks(sizes, [str(n) for n in log_sizes])\n",
" plt.gca().xaxis.set_minor_locator(plt.NullLocator())\n",
" \n",
" def mem_line(size, label):\n",
" plt.axvline(size / 32, color='grey', linestyle='--')\n",
" plt.text(size / 32, 100, label)\n",
" \n",
" mem_line(reg_file, \"REG\")\n",
" mem_line(l1, \"L1\")\n",
" mem_line(l2, \"L2\")\n",
" mem_line(l3, \"L3\")\n",
" mem_line(ram, \"RAM\")\n",
" mem_line(disk, \"DISK\")\n",
"\n",
" plt.yscale('log')\n",
" plt.ylabel('Time $\\\\left[\\\\mathtt{sec}\\\\right]$')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def series(data, color='tab:blue'):\n",
" plt.plot(sizes[:len(data)], data, color=color, marker='.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Benchmark c5.2xlarge"
]
},
{
"cell_type": "markdown",
"metadata": {
"toc-hr-collapsed": true,
"toc-nb-collapsed": true
},
"source": [
"On AWS EC2 instance type `c5.2xlarge`. Root drive size increased to 256GB and a 64GB swapfile is added. Using `--allocation heap`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft = np.fromstring(\"\"\"\n",
"1\t0.00000531\n",
"2\t0.000004477\n",
"3\t0.000007344\n",
"4\t0.000009439\n",
"5\t0.000013781\n",
"6\t0.000023936\n",
"7\t0.000042144\n",
"8\t0.000077292\n",
"9\t0.000149075\n",
"10\t0.000309063\n",
"11\t0.000690227\n",
"12\t0.001507601\n",
"13\t0.00330798\n",
"14\t0.00717378\n",
"15\t0.002854655\n",
"16\t0.005344267\n",
"17\t0.010803624\n",
"18\t0.022976793\n",
"19\t0.046437339\n",
"20\t0.097819326\n",
"21\t0.203701168\n",
"22\t0.424807287\n",
"23\t0.907349772\n",
"24\t1.858657419\n",
"25\t4.064888369\n",
"26\t8.003252785\n",
"27\t17.67193235\n",
"28\t34.221835982\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft_sqrt = np.fromstring(\"\"\"\n",
"1\t0.000008935\n",
"2\t0.000038013\n",
"3\t0.000023374\n",
"4\t0.000026747\n",
"5\t0.000029887\n",
"6\t0.00004039\n",
"7\t0.000048373\n",
"8\t0.000072872\n",
"9\t0.000123616\n",
"10\t0.000250539\n",
"11\t0.000507387\n",
"12\t0.001089775\n",
"13\t0.002354191\n",
"14\t0.005096965\n",
"15\t0.010595209\n",
"16\t0.022575648\n",
"17\t0.0473638\n",
"18\t0.100595503\n",
"19\t0.211823046\n",
"20\t0.446282983\n",
"21\t0.938822169\n",
"22\t1.9707616190000001\n",
"23\t4.142269804\n",
"24\t8.607713068\n",
"25\t18.171836223\n",
"26\t37.403999458\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft_rec = np.fromstring(\"\"\"\n",
"1\t0.000006159\n",
"2\t0.000004487\n",
"3\t0.000006596\n",
"4\t0.000008758\n",
"5\t0.000011726\n",
"6\t0.000026389\n",
"7\t0.000039159\n",
"8\t0.000073156\n",
"9\t0.000148793\n",
"10\t0.00030885\n",
"11\t0.000692644\n",
"12\t0.001506197\n",
"13\t0.003307426\n",
"14\t0.00717706\n",
"15\t0.015458752\n",
"16\t0.033280211\n",
"17\t0.071393263\n",
"18\t0.152663299\n",
"19\t0.325223417\n",
"20\t0.691438377\n",
"21\t1.455103663\n",
"22\t3.058479201\n",
"23\t6.4278282\n",
"24\t13.515026047\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"init_plot()\n",
"series(fft, 'tab:red')\n",
"series(fft_sqrt, 'tab:orange')\n",
"series(fft_rec, 'tab:blue')"
]
},
{
"cell_type": "markdown",
"metadata": {
"toc-hr-collapsed": true,
"toc-nb-collapsed": true
},
"source": [
"## Benchmark c5.2xlarge (64MiB RAM)"
]
},
{
"cell_type": "markdown",
"metadata": {
"toc-hr-collapsed": true,
"toc-nb-collapsed": true
},
"source": [
"On AWS EC2 instance type `c5.2xlarge`. Root drive size increased to 256GiB and a 64GiB swapfile is added.\n",
"\n",
"```\n",
"L1 Instruction-Cache: (32 KiB, 8-way associativity, direct-mapped)\n",
"L1 Data-Cache: (32 KiB, 8-way associativity, direct-mapped)\n",
"L2 Unified-Cache: (1024 KiB, 16-way associativity, direct-mapped)\n",
"L3 Unified-Cache: (24 MiB, 11-way associativity, hash-based-mapping)\n",
"```\n",
"\n",
"Memory is restricted to 64MiB RAM using cgroups:\n",
"\n",
"```\n",
"sudo cgcreate -t $USER:$USER -a $USER:$USER -g memory:limited\n",
"echo 67108864 > /sys/fs/cgroup/memory/limited/memory.limit_in_bytes\n",
"cgexec -g memory:limited ./fft\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft_heap = np.fromstring(\"\"\"\n",
"1\t0.00034372\n",
"2\t0.000039486\n",
"3\t0.000035912\n",
"4\t0.000037605\n",
"5\t0.000041504\n",
"6\t0.000042154\n",
"7\t0.000055761\n",
"8\t0.000055802\n",
"9\t0.000086509\n",
"10\t0.000104544\n",
"11\t0.000249147\n",
"12\t0.000277132\n",
"13\t0.000917944\n",
"14\t0.001096818\n",
"15\t0.00341159\n",
"16\t0.004738028\n",
"17\t0.015229078\n",
"18\t0.020695995\n",
"19\t0.070773651\n",
"20\t0.091959886\n",
"21\t30.928260041\n",
"22\t40.091185241\n",
"23\t163.181767742\n",
"24\t184.1793349\n",
"25\t832.334289622\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft_mmap = np.fromstring(\"\"\"\n",
"1\t0.000327282\n",
"2\t0.000035651\n",
"3\t0.000028616\n",
"4\t0.000040656\n",
"5\t0.000043662\n",
"6\t0.000036332\n",
"7\t0.000054138\n",
"8\t0.000056564\n",
"9\t0.000078225\n",
"10\t0.000108604\n",
"11\t0.000244958\n",
"12\t0.000287139\n",
"13\t0.000859705\n",
"14\t0.001116579\n",
"15\t0.003552164\n",
"16\t0.004715079\n",
"17\t0.015413409\n",
"18\t0.021136953\n",
"19\t0.073038566\n",
"20\t0.092504724\n",
"21\t76.226779587\n",
"22\t68.887253346\n",
"23\t726.08383654\n",
"24\t527.209818911\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"transpose_heap = np.fromstring(\"\"\"\n",
"1\t0.000000407\n",
"2\t0.000000201\n",
"3\t0.000000508\n",
"4\t0.000000106\n",
"5\t0.000000378\n",
"6\t0.000000184\n",
"7\t0.000004731\n",
"8\t0.000000604\n",
"9\t0.000007464\n",
"10\t0.000001546\n",
"11\t0.000031108\n",
"12\t0.000005326\n",
"13\t0.00012615\n",
"14\t0.000028176\n",
"15\t0.000708114\n",
"16\t0.000138124\n",
"17\t0.003178672\n",
"18\t0.000945967\n",
"19\t0.015287759\n",
"20\t0.005595849\n",
"21\t10.570897667\n",
"22\t11.281882518\n",
"23\t48.99322635\n",
"24\t44.853152764\n",
"25\t685.421398633\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"transpose_mmap = np.fromstring(\"\"\"\n",
"1\t0.000001035\n",
"2\t0.000000202\n",
"3\t0.000000511\n",
"4\t0.000000152\n",
"5\t0.000000656\n",
"6\t0.000000288\n",
"7\t0.000007064\n",
"8\t0.000000804\n",
"9\t0.000011185\n",
"10\t0.000002899\n",
"11\t0.000031892\n",
"12\t0.00000538\n",
"13\t0.000134255\n",
"14\t0.000028246\n",
"15\t0.000710412\n",
"16\t0.000142058\n",
"17\t0.003230439\n",
"18\t0.001012398\n",
"19\t0.015829714\n",
"20\t0.005500206\n",
"21\t10.533343567\n",
"22\t11.358831156\n",
"23\t48.803206975\n",
"24\t44.780711319\n",
"25\t186.202560067\n",
"26\t145.289483772\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"init_plot(ram=64*1024**2)\n",
"series(fft_heap, 'tab:red')\n",
"series(fft_mmap, 'tab:blue')\n",
"series(transpose_heap, 'tab:orange')\n",
"series(transpose_mmap, 'tab:cyan')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Benchmark single thread"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"c5.2xlarge"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft = np.fromstring(\"\"\"\n",
"1\t0.000010837\n",
"2\t0.00003163\n",
"3\t0.000021725\n",
"4\t0.000027168\n",
"5\t0.000025975\n",
"6\t0.000034325\n",
"7\t0.000043394\n",
"8\t0.000069395\n",
"9\t0.00013186\n",
"10\t0.000252507\n",
"11\t0.000502172\n",
"12\t0.001078727\n",
"13\t0.00234277\n",
"14\t0.00504515\n",
"15\t0.010528185\n",
"16\t0.022451856\n",
"17\t0.046697365\n",
"18\t0.099328349\n",
"19\t0.207932491\n",
"20\t0.444358145\n",
"21\t0.93529772\n",
"22\t1.964368904\n",
"23\t4.132257624\n",
"24\t8.600371875\n",
"25\t18.178107202\n",
"26\t37.357505465\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft_iterative = np.fromstring(\"\"\"\n",
"1\t0.000005481\n",
"2\t0.000004502\n",
"3\t0.00000692\n",
"4\t0.000008215\n",
"5\t0.000011034\n",
"6\t0.000020269\n",
"7\t0.000032115\n",
"8\t0.000056194\n",
"9\t0.000098589\n",
"10\t0.000203465\n",
"11\t0.000447601\n",
"12\t0.000986478\n",
"13\t0.002059617\n",
"14\t0.004453699\n",
"15\t0.009803471\n",
"16\t0.022309177\n",
"17\t0.047588306\n",
"18\t0.106476312\n",
"19\t0.26432274\n",
"20\t0.60108076\n",
"21\t1.2733614389999999\n",
"22\t2.688171277\n",
"23\t5.689161517\n",
"24\t12.132268398\n",
"25\t25.850465222\n",
"26\t55.871878074\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft_depth_first = np.fromstring(\"\"\"\n",
"1\t0.000005586\n",
"2\t0.000005259\n",
"3\t0.000009514\n",
"4\t0.000010033\n",
"5\t0.000013038\n",
"6\t0.000027076\n",
"7\t0.000037728\n",
"8\t0.000062199\n",
"9\t0.000136259\n",
"10\t0.000282032\n",
"11\t0.000628744\n",
"12\t0.001382235\n",
"13\t0.003047723\n",
"14\t0.006714292\n",
"15\t0.014992353\n",
"16\t0.033965664\n",
"17\t0.073531157\n",
"18\t0.170479579\n",
"19\t0.409346494\n",
"20\t1.049937156\n",
"21\t2.444037146\n",
"22\t5.505871476\n",
"23\t12.131428218\n",
"24\t26.768131456\n",
"25\t61.079266758\n",
"26\t133.57830445\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft_recursive = np.fromstring(\"\"\"\n",
"1\t0.000005541\n",
"2\t0.000004514\n",
"3\t0.000004572\n",
"4\t0.000005506\n",
"5\t0.000008072\n",
"6\t0.000014245\n",
"7\t0.000031183\n",
"8\t0.00006486\n",
"9\t0.000143123\n",
"10\t0.000324709\n",
"11\t0.000689698\n",
"12\t0.00150638\n",
"13\t0.003293801\n",
"14\t0.007106039\n",
"15\t0.01528987\n",
"16\t0.032758035\n",
"17\t0.069945614\n",
"18\t0.148304918\n",
"19\t0.315367176\n",
"20\t0.670398505\n",
"21\t1.417594193\n",
"22\t2.980605325\n",
"23\t6.24611864\n",
"24\t13.096406455\n",
"25\t27.498664066\n",
"26\t57.386700655\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"init_plot()\n",
"series(fft, 'black')\n",
"series(fft_iterative, 'tab:blue')\n",
"series(fft_depth_first, 'tab:orange')\n",
"series(fft_recursive, 'tab:pink')\n",
"series(fft2_heap, 'tab:cyan')"
]
},
{
"cell_type": "markdown",
"metadata": {
"toc-hr-collapsed": true,
"toc-nb-collapsed": true
},
"source": [
"## Memory access pattern"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def fft_df(values, size, offset, stride, loop):\n",
" if size == 1:\n",
" values += [offset]\n",
" else:\n",
" if stride == loop and loop < 128:\n",
" fft_df(values, size // 2, offset, 2 * stride, 2 * loop)\n",
" else:\n",
" fft_df(values, size // 2, offset, 2 * stride, loop)\n",
" fft_df(values, size // 2, offset + stride, 2 * stride, loop)\n",
" for i in range(size // 2):\n",
" for j in range(loop):\n",
" values += [offset + 2 * i * stride + j]\n",
" values += [offset + 2 * i * stride + j + stride]\n",
" return values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"a = fft_df([], 16384, 0, 1, 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.plot(a, linestyle='', marker='.')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"2**15 / 64 / 8"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Threads"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fft_threads = np.fromstring(\"\"\"\n",
"24\t8.602388099\n",
"24\t4.433958019\n",
"24\t3.055124852\n",
"24\t2.35013958\n",
"24\t2.18961738\n",
"24\t2.051058217\n",
"24\t1.933472554\n",
"24\t1.826348783\n",
"24\t1.8278633229999999\n",
"24\t1.837789269\n",
"\"\"\", sep=' ').reshape((-1, 2))[:,1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"threads = np.array(range(1,20))\n",
"plt.xticks(threads, [str(n) for n in threads])\n",
"plt.axvline(4, color='grey', linestyle='--')\n",
"plt.text(4, 1, 'Cores')\n",
"plt.axvline(8, color='grey', linestyle='--')\n",
"plt.text(8, 1, 'Hyper threads')\n",
"plt.plot(threads[:len(fft_threads)], fft_threads * threads[:len(fft_threads)] / fft_threads[0], marker = '.')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"threads[:len(fft_threads)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"2.99\n",
"\n",
"\n",
"$69.7 billion"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"104 / 2.3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}