TensorFlow (MNIST)#
A typical use-case with Tensorflow would look something like this
import tensorflow as tf
import pyhopper
import numpy as np
def get_data(for_validation=True):
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape((-1, 28 * 28)) / 255.0
x_test = x_test.reshape((-1, 28 * 28)) / 255.0
if for_validation:
val_size = int(0.05 * x_train.shape[0])
shuffle = np.random.default_rng(12345).permutation(x_train.shape[0])
x_train, y_train = x_train[shuffle], y_train[shuffle]
x_val, y_val = x_train[:val_size], y_train[:val_size]
x_train, y_train = x_train[val_size:], y_train[val_size:]
return x_train, y_train, x_val, y_val
else:
return x_train, y_train, x_test, y_test
def train_mnist_mlp(params, for_validation=True):
x_train, y_train, x_val, y_val = get_data(for_validation)
input_tensor = tf.keras.Input((28 * 28))
x = input_tensor
for i in range(params["num_layers"]):
x = tf.keras.layers.Dense(
params["size"][i],
activation=params["activation"],
kernel_regularizer=tf.keras.regularizers.l2(params["weight_decay"]),
)(x)
x = tf.keras.layers.Dropout(params["dropout"])(x)
x = tf.keras.layers.Dense(10, "softmax")(x)
model = tf.keras.Model(input_tensor, x)
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
params["lr_init"],
params["epochs"] * len(x_train) // params["batch_size"],
params["alpha"],
)
model.compile(
loss=tf.keras.losses.sparse_categorical_crossentropy,
metrics=[tf.keras.metrics.sparse_categorical_accuracy],
optimizer=tf.keras.optimizers.Adam(lr_schedule),
)
model.fit(
x_train,
y_train,
batch_size=params["batch_size"],
epochs=params["epochs"],
validation_data=None if for_validation else (x_val, y_val),
verbose=0,
)
_, val_acc = model.evaluate(x_val, y_val, verbose=0)
return val_acc
search = pyhopper.Search(
{
"activation": pyhopper.choice(["relu", "swish", "gelu", "elu"]),
"num_layers": pyhopper.int(1, 5),
"size": pyhopper.int(64, 256, multiple_of=16, shape=5),
"dropout": pyhopper.float(0, 0.5, precision=1),
"lr_init": pyhopper.float(0.005, 0.0005, log=True),
"alpha": pyhopper.choice([0, 1e-3, 1e-2, 1 - 1], is_ordinal=True),
"weight_decay": pyhopper.float(1e-6, 1e-2, log=True, precision=1),
"batch_size": 32,
"epochs": 30,
}
)
best_params = search.run(
pyhopper.wrap_n_times(train_mnist_mlp, n=3, yield_after=0),
direction="max",
runtime="4h",
n_jobs="per-gpu",
canceler=pyhopper.pruners.QuantilePruner(0.6),
)
test_acc = train_mnist_mlp(best_params, for_validation=False)
print(f"Tuned params test accuracy: {100*test_acc:0.2f}%")
print("best", best_params)
Outputs
> Search is scheduled for 04:00:00 (h:m:s)
> Best f: 0.989 (out of 127 params): 98%|█████████▊| [3:56:08<03:49, 52.3 s/param]
> ============================ Summary ===========================
> Mode : Best f : Steps : Pruned : Time
> ---------------- : ---- : ---- : ---- : ----
> Initial solution : 0.983 : 1 : 0 : 09:48 (m:s)
> Random seeding : 0.986 : 26 : 48 : 04:39:53 (h:m:s)
> Local sampling : 0.989 : 100 : 96 : 16:03:12 (h:m:s)
> ---------------- : ---- : ---- : ---- : ----
> Total : 0.989 : 127 : 144 : 03:56:10 (h:m:s)
> ================================================================
>
> Tuned params test accuracy: 98.74%
> best {'activation': 'swish', 'num_layers': 2, 'size': array([240, 208, 160, 128, 144]), 'dropout': 0.1, 'lr_init': 0.00239950593715168, 'alpha': 0.001, 'weight_decay': 2e-05, 'batch_size': 32, 'epochs': 30}
Note
Achieving a >99.5% accuracy on MNIST is possible with a convolutional neural network