Profiling notebook#
This notebook collects and compares the run time for several notebooks. These notebooks are specified in the experiment_notebooks
variable (see Configuration).
Simply run the whole notebook, and the results will be displayed in tables at the end.
Each notebook listed in the experiment_notebooks
(See Configuration) table must have the run_experiment()
defined. Optionally, you can define the close_experiment()
function.
This profiler will profile the run_experiment
function, and after that’s done, call the close_experiment
(if it exists). The close_experiment
function is not mandatory, but if there are any resources that need to be closed, you can implement that here. The profiler measures the times listed in the methods
variable (see Configuration), and the total time.
After the profiling is done, the notebook generates a file in this directory for each notebook. This file contains the detailed profiling report. For the notebook <notebook>.ipynb
it generates <notebook>.ipynb.prof
file, which can be opened with snakeviz (pip install snakeviz
): snakeviz <notebook>.ipynb.prof
.
Configuration#
# `benchmark_mode` sets whether we run the schedules in benchmark mode.
# If it's benchmark mode, we override the reference measurements file
# with the current timing values, and that will be those will be the new reference values.
benchmark_mode = False
profiling_reference_filename = "profiling_reference_values.pickle"
# How many times each notebook is run. (The results are averaged.)
samples = 10
# The end result table will display each cell in different colors.
# Each value's "sigma" is practically it's measurement error,
# and if the current time is above/below
# the `reference value±sigma*sigma_multiplier_threshold`
# the cell will be displayed in different colors.
sigma_multiplier_threshold = 2.0 # 2.0 is a reasonable value.
# Notebooks to profile.
experiment_notebooks = [
"./simple_binned_acquisition.ipynb",
"./resonator_spectroscopy.ipynb",
"./random_gates.ipynb",
]
methods = [
("compile", ("QuantifyCompiler", "compile")),
("prepare", ("InstrumentCoordinator", "prepare")),
("schedule", (None, "create_schedule")),
("run", ("InstrumentCoordinator", "start")),
("process", ("InstrumentCoordinator", "retrieve_acquisition"))
]
Loading reference data#
# Reference values for profiling.
# Each notebook has a reference timing value.
import pickle
from os.path import exists
if (not benchmark_mode):
if (not exists(profiling_reference_filename)):
raise RuntimeError(
f"Reference file '{profiling_reference_filename}' does not exist! "
f"Make sure this file is created by first running the profiling with 'benchmark_mode=True'!"
)
with open(profiling_reference_filename, "rb") as f:
reference = pickle.load(f)
Profiling functions#
import cProfile
import pstats
import qcodes
import importlib
import inspect
import numpy as np
import pandas as pd
def stat_experiment(experiment_notebook):
qcodes.instrument.Instrument.close_all()
%run $experiment_notebook
with cProfile.Profile() as pr:
run_experiment()
if "close_experiment" in globals():
close_experiment()
return pstats.Stats(pr)
def match_class_method(class_name, method_name, module_path, line_number):
module_name = inspect.getmodulename(module_path)
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
try:
spec.loader.exec_module(module)
except:
#print(f"WARNING {class_name, method_name, module_path, line_number}")
return False
classes = inspect.getmembers(module, inspect.isclass)
for _, cls in classes:
methods = inspect.getmembers(cls, inspect.isfunction)
for method in methods:
if method[0] == method_name:
_, start_line = inspect.getsourcelines(method[1])
if (class_name == cls.__name__) and (start_line == line_number):
return True
return False
def get_stat(stats, class_name, method_name):
for stat_key in stats.stats:
module_path = stat_key[0]
line_number = stat_key[1]
current_method_name = stat_key[2]
if method_name == current_method_name:
if class_name is not None:
if match_class_method(class_name, method_name, module_path, line_number):
return stats.stats[stat_key]
else:
return stats.stats[stat_key]
return None
def expected_value_and_sigma(t_sum, t_sq_sum, samples):
expected_value = t_sum / samples
sigma = (t_sq_sum / samples - expected_value ** 2) ** 0.5
return (expected_value, sigma)
def stat_experiment_detailed(experiment_notebook, samples):
total_time = [0, 0]
times = [[0, 0] for _ in range(len(methods))]
for sample in range(samples):
print(f"Running notebook {experiment_notebook} {sample + 1}/{samples}")
stats = stat_experiment(experiment_notebook)
for i, method in enumerate(methods):
current_stats = get_stat(stats, method[1][0], method[1][1])
if current_stats:
time = current_stats[3]
times[i][0] += time
times[i][1] += time ** 2
else:
times[i][0] = np.nan
times[i][1] = np.nan
total_time[0] += stats.total_tt
total_time[1] += stats.total_tt ** 2
times = [expected_value_and_sigma(t[0], t[1], samples) for t in times]
total_time = expected_value_and_sigma(total_time[0], total_time[1], samples)
stats.dump_stats(f"{experiment_notebook}.prof")
print(f"Generated `{experiment_notebook}.prof` profiling file")
return times, total_time
Running the profiling#
measured_data = []
for experiment_notebook in experiment_notebooks:
times, total_time = stat_experiment_detailed(experiment_notebook, samples=samples)
measured_data.append((experiment_notebook, times, total_time))
measured_data
if (benchmark_mode):
with open(profiling_reference_filename, "wb") as f:
pickle.dump(measured_data, f)
reference = measured_data
Displaying the results#
reference
table = []
header = []
table_diff = []
header_diff = []
header.append("")
header_diff.append("")
for method in methods:
header.append(method[0])
header_diff.append(method[0])
header.append("total")
header_diff.append("total")
for row_id, (experiment_notebook, times, total_time) in enumerate(measured_data):
row = []
row_diff = []
row.append(experiment_notebook)
row_diff.append(experiment_notebook)
for column_id, time in enumerate(times):
expected_value = time[0]
sigma = time[1]
row.append(f"{expected_value:.2g} ± {sigma:.2g} s")
time_diff = expected_value - reference[row_id][1][column_id][0]
row_diff.append(f"{time_diff:.2g} ± {sigma:.2g} s")
row.append(f"{total_time[0]:.2g} ± {total_time[1]:.2g} s")
total_time_diff = total_time[0] - reference[row_id][2][0]
row_diff.append(f"{total_time_diff:.2g} ± {total_time[1]:.2g} s")
table.append(row)
table_diff.append(row_diff)
def diff_to_style(current, ref):
green = "#d0ffd0"
red = "#ffd0d0"
val, sigma = current[0], current[1]
ref_val, ref_sigma = ref[0], ref[1]
if ((val - sigma * sigma_multiplier_threshold) > (ref_val + ref_sigma * sigma_multiplier_threshold)):
return f"background-color: {red}"
if ((val + sigma * sigma_multiplier_threshold) < (ref_val - ref_sigma * sigma_multiplier_threshold)):
return f"background-color: {green}"
return ""
style_table = []
for row_id, (experiment_notebook, times, total_time) in enumerate(measured_data):
row = []
row.append("")
for column_id, time in enumerate(times):
if row_id < len(reference) and column_id < len(reference[row_id][1]):
row.append(diff_to_style(time, reference[row_id][1][column_id]))
else:
row.append("")
if row_id < len(reference):
row.append(diff_to_style(total_time, reference[row_id][2]))
else:
row.append("")
style_table.append(row)
style_table = np.array(style_table)
style_properties = {"border": "1px solid gray"}
styles = [dict(selector="caption", props=[("text-align", "center"), ("font-size", "200%"), ("color", "black")])]
df = pd.DataFrame(table, columns=header)
df = df.style.set_properties(**style_properties).apply(lambda _ : style_table, axis=None)
df = df.set_caption("Measured times").set_table_styles(styles)
df_diff = pd.DataFrame(table_diff, columns=header)
df_diff = df_diff.style.set_properties(**style_properties).apply(lambda _ : style_table, axis=None)
df_diff = df_diff.set_caption("Measured diffs to reference").set_table_styles(styles)
# If the cell is green (or red), the current time
# is significantly less (or more) than the reference time.
df
# All data is (current_time - reference_time).
# If the cell is green (or red), the current time
# is significantly less (or more) than the reference time.
df_diff