Profiling notebook#

This notebook collects and compares the run time for several notebooks. These notebooks are specified in the experiment_notebooks variable (see Configuration). Simply run the whole notebook, and the results will be displayed in tables at the end.

Each notebook listed in the experiment_notebooks (See Configuration) table must have the run_experiment() defined. Optionally, you can define the close_experiment() function.

This profiler will profile the run_experiment function, and after that’s done, call the close_experiment (if it exists). The close_experiment function is not mandatory, but if there are any resources that need to be closed, you can implement that here. The profiler measures the times listed in the methods variable (see Configuration), and the total time.

After the profiling is done, the notebook generates a file in this directory for each notebook. This file contains the detailed profiling report. For the notebook <notebook>.ipynb it generates <notebook>.ipynb.prof file, which can be opened with snakeviz (pip install snakeviz): snakeviz <notebook>.ipynb.prof.

Configuration#

# `benchmark_mode` sets whether we run the schedules in benchmark mode.
# If it's benchmark mode, we override the reference measurements file
# with the current timing values, and that will be those will be the new reference values.
benchmark_mode = False
profiling_reference_filename = "profiling_reference_values.pickle"

# How many times each notebook is run. (The results are averaged.)
samples = 10

# The end result table will display each cell in different colors.
# Each value's "sigma" is practically it's measurement error,
# and if the current time is above/below
# the `reference value±sigma*sigma_multiplier_threshold`
# the cell will be displayed in different colors.
sigma_multiplier_threshold = 2.0 # 2.0 is a reasonable value.

# Notebooks to profile.
experiment_notebooks = [
    "./simple_binned_acquisition.ipynb",
    "./resonator_spectroscopy.ipynb",
    "./random_gates.ipynb",
]

methods = [
        ("compile",  ("QuantifyCompiler", "compile")),
        ("prepare",  ("InstrumentCoordinator", "prepare")),
        ("schedule", (None, "create_schedule")),
        ("run",      ("InstrumentCoordinator", "start")),
        ("process",  ("InstrumentCoordinator", "retrieve_acquisition"))
    ]

Loading reference data#

# Reference values for profiling.
# Each notebook has a reference timing value.
import pickle
from os.path import exists

if (not benchmark_mode):
    if (not exists(profiling_reference_filename)):
        raise RuntimeError(
            f"Reference file '{profiling_reference_filename}' does not exist! "
            f"Make sure this file is created by first running the profiling with 'benchmark_mode=True'!"
        )
    with open(profiling_reference_filename, "rb") as f:
        reference = pickle.load(f)

Profiling functions#

import cProfile
import pstats
import qcodes
import importlib
import inspect
import numpy as np
import pandas as pd

def stat_experiment(experiment_notebook):
    qcodes.instrument.Instrument.close_all()
    %run $experiment_notebook
    with cProfile.Profile() as pr:
        run_experiment()
    if "close_experiment" in globals():
        close_experiment()
    return pstats.Stats(pr)

def match_class_method(class_name, method_name, module_path, line_number):
    module_name = inspect.getmodulename(module_path)
    spec = importlib.util.spec_from_file_location(module_name, module_path)
    module = importlib.util.module_from_spec(spec)
    
    try:
        spec.loader.exec_module(module)
    except:
        #print(f"WARNING {class_name, method_name, module_path, line_number}")
        return False
    
    classes = inspect.getmembers(module, inspect.isclass)
    for _, cls in classes:
        methods = inspect.getmembers(cls, inspect.isfunction)
        for method in methods:
            if method[0] == method_name:
                _, start_line = inspect.getsourcelines(method[1])
                if (class_name == cls.__name__) and (start_line == line_number):
                    return True
    return False

def get_stat(stats, class_name, method_name):
    for stat_key in stats.stats:
        module_path = stat_key[0]
        line_number = stat_key[1]
        current_method_name = stat_key[2]
        if method_name == current_method_name:
            if class_name is not None:
                if match_class_method(class_name, method_name, module_path, line_number):
                    return stats.stats[stat_key]
            else:
                return stats.stats[stat_key]
    return None

def expected_value_and_sigma(t_sum, t_sq_sum, samples):
    expected_value = t_sum / samples
    sigma = (t_sq_sum / samples - expected_value ** 2) ** 0.5
    return (expected_value, sigma)

def stat_experiment_detailed(experiment_notebook, samples):
    total_time = [0, 0]
    times = [[0, 0] for _ in range(len(methods))]
    for sample in range(samples):
        print(f"Running notebook {experiment_notebook}    {sample + 1}/{samples}")
        stats = stat_experiment(experiment_notebook)

        for i, method in enumerate(methods):
            current_stats = get_stat(stats, method[1][0], method[1][1])
            if current_stats:
                time = current_stats[3]
                times[i][0] += time
                times[i][1] += time ** 2
            else:
                times[i][0] = np.nan
                times[i][1] = np.nan
        total_time[0] += stats.total_tt
        total_time[1] += stats.total_tt ** 2

    times = [expected_value_and_sigma(t[0], t[1], samples) for t in times]
    total_time = expected_value_and_sigma(total_time[0], total_time[1], samples)
    
    stats.dump_stats(f"{experiment_notebook}.prof")
    print(f"Generated `{experiment_notebook}.prof` profiling file")
    
    return times, total_time

Running the profiling#

measured_data = []
for experiment_notebook in experiment_notebooks:
    times, total_time = stat_experiment_detailed(experiment_notebook, samples=samples)
    measured_data.append((experiment_notebook, times, total_time))

measured_data

if (benchmark_mode):
    with open(profiling_reference_filename, "wb") as f:
        pickle.dump(measured_data, f)
    reference = measured_data

Displaying the results#

reference

table = []
header = []
table_diff = []
header_diff = []

header.append("")
header_diff.append("")
for method in methods:
    header.append(method[0])
    header_diff.append(method[0])
header.append("total")
header_diff.append("total")
    
for row_id, (experiment_notebook, times, total_time) in enumerate(measured_data):

    row = []
    row_diff = []
    row.append(experiment_notebook)
    row_diff.append(experiment_notebook)
    for column_id, time in enumerate(times):
        expected_value = time[0]
        sigma = time[1]
        row.append(f"{expected_value:.2g} ± {sigma:.2g} s")
        
        time_diff = expected_value - reference[row_id][1][column_id][0]
        row_diff.append(f"{time_diff:.2g} ± {sigma:.2g} s")
        
    row.append(f"{total_time[0]:.2g} ± {total_time[1]:.2g} s")
    
    total_time_diff = total_time[0] - reference[row_id][2][0]
    row_diff.append(f"{total_time_diff:.2g} ± {total_time[1]:.2g} s")
        
    table.append(row)
    table_diff.append(row_diff)

def diff_to_style(current, ref):
    green = "#d0ffd0"
    red = "#ffd0d0"
    val, sigma = current[0], current[1]
    ref_val, ref_sigma = ref[0], ref[1]
    if ((val - sigma * sigma_multiplier_threshold) > (ref_val + ref_sigma * sigma_multiplier_threshold)):
        return f"background-color: {red}"
    if ((val + sigma * sigma_multiplier_threshold) < (ref_val - ref_sigma * sigma_multiplier_threshold)):
        return f"background-color: {green}"
    return ""

style_table = []

for row_id, (experiment_notebook, times, total_time) in enumerate(measured_data):
    row = []
    row.append("")
    for column_id, time in enumerate(times):
        if row_id < len(reference) and column_id < len(reference[row_id][1]):
            row.append(diff_to_style(time, reference[row_id][1][column_id]))
        else:
            row.append("")
    if row_id < len(reference):
        row.append(diff_to_style(total_time, reference[row_id][2]))
    else:
        row.append("")
    style_table.append(row)

style_table = np.array(style_table)
style_properties = {"border": "1px solid gray"}
styles = [dict(selector="caption", props=[("text-align", "center"), ("font-size", "200%"), ("color", "black")])]

df = pd.DataFrame(table, columns=header)
df = df.style.set_properties(**style_properties).apply(lambda _ : style_table, axis=None)
df = df.set_caption("Measured times").set_table_styles(styles)

df_diff = pd.DataFrame(table_diff, columns=header)
df_diff = df_diff.style.set_properties(**style_properties).apply(lambda _ : style_table, axis=None)
df_diff = df_diff.set_caption("Measured diffs to reference").set_table_styles(styles)

# If the cell is green (or red), the current time
# is significantly less (or more) than the reference time.
df

# All data is (current_time - reference_time).
# If the cell is green (or red), the current time
# is significantly less (or more) than the reference time.
df_diff