TensorFlow (MNIST)#

A typical use-case with Tensorflow would look something like this

import tensorflow as tf
import pyhopper
import numpy as np

def get_data(for_validation=True):
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    x_train = x_train.reshape((-1, 28 * 28)) / 255.0
    x_test = x_test.reshape((-1, 28 * 28)) / 255.0
    if for_validation:
        val_size = int(0.05 * x_train.shape[0])
        shuffle = np.random.default_rng(12345).permutation(x_train.shape[0])
        x_train, y_train = x_train[shuffle], y_train[shuffle]
        x_val, y_val = x_train[:val_size], y_train[:val_size]
        x_train, y_train = x_train[val_size:], y_train[val_size:]
        return x_train, y_train, x_val, y_val
    else:
        return x_train, y_train, x_test, y_test


def train_mnist_mlp(params, for_validation=True):
    x_train, y_train, x_val, y_val = get_data(for_validation)
    input_tensor = tf.keras.Input((28 * 28))
    x = input_tensor
    for i in range(params["num_layers"]):
        x = tf.keras.layers.Dense(
            params["size"][i],
            activation=params["activation"],
            kernel_regularizer=tf.keras.regularizers.l2(params["weight_decay"]),
        )(x)
        x = tf.keras.layers.Dropout(params["dropout"])(x)
    x = tf.keras.layers.Dense(10, "softmax")(x)
    model = tf.keras.Model(input_tensor, x)

    lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
        params["lr_init"],
        params["epochs"] * len(x_train) // params["batch_size"],
        params["alpha"],
    )
    model.compile(
        loss=tf.keras.losses.sparse_categorical_crossentropy,
        metrics=[tf.keras.metrics.sparse_categorical_accuracy],
        optimizer=tf.keras.optimizers.Adam(lr_schedule),
    )
    model.fit(
        x_train,
        y_train,
        batch_size=params["batch_size"],
        epochs=params["epochs"],
        validation_data=None if for_validation else (x_val, y_val),
        verbose=0,
    )
    _, val_acc = model.evaluate(x_val, y_val, verbose=0)
    return val_acc


search = pyhopper.Search(
    {
        "activation": pyhopper.choice(["relu", "swish", "gelu", "elu"]),
        "num_layers": pyhopper.int(1, 5),
        "size": pyhopper.int(64, 256, multiple_of=16, shape=5),
        "dropout": pyhopper.float(0, 0.5, precision=1),
        "lr_init": pyhopper.float(0.005, 0.0005, log=True),
        "alpha": pyhopper.choice([0, 1e-3, 1e-2, 1 - 1], is_ordinal=True),
        "weight_decay": pyhopper.float(1e-6, 1e-2, log=True, precision=1),
        "batch_size": 32,
        "epochs": 30,
    }
)
best_params = search.run(
    pyhopper.wrap_n_times(train_mnist_mlp, n=3, yield_after=0),
    direction="max",
    runtime="4h",
    n_jobs="per-gpu",
    canceler=pyhopper.pruners.QuantilePruner(0.6),
)
test_acc = train_mnist_mlp(best_params, for_validation=False)
print(f"Tuned params test accuracy: {100*test_acc:0.2f}%")
print("best", best_params)

Outputs

> Search is scheduled for 04:00:00 (h:m:s)
> Best f: 0.989 (out of 127 params):  98%|█████████▊| [3:56:08<03:49, 52.3 s/param]
> ============================ Summary ===========================
> Mode              : Best f : Steps : Pruned   : Time
> ----------------  : ----   : ----  : ----     : ----
> Initial solution  : 0.983  : 1     : 0        : 09:48 (m:s)
> Random seeding    : 0.986  : 26    : 48       : 04:39:53 (h:m:s)
> Local sampling    : 0.989  : 100   : 96       : 16:03:12 (h:m:s)
> ----------------  : ----   : ----  : ----     : ----
> Total             : 0.989  : 127   : 144      : 03:56:10 (h:m:s)
> ================================================================
>
> Tuned params test accuracy: 98.74%
> best {'activation': 'swish', 'num_layers': 2, 'size': array([240, 208, 160, 128, 144]), 'dropout': 0.1, 'lr_init': 0.00239950593715168, 'alpha': 0.001, 'weight_decay': 2e-05, 'batch_size': 32, 'epochs': 30}

Note

Achieving a >99.5% accuracy on MNIST is possible with a convolutional neural network