from datasets import load_dataset,load_dataset_builder
import torchvision.transforms.functional as TF
import torch
import torch.nn as nn
import torch.nn.functional as FDataloaders
Module containing helper functions and classes around dataloaders
hf_ds_collate_fn
hf_ds_collate_fn (data, flatten=True)
Collation function for building a PyTorch DataLoader from a a huggingface dataset. Tries to put all items from an entry into the dataset to tensor. PIL images are converted to tensor, either flattened or not
DataLoaders
DataLoaders (train, valid)
Class that exposes two PyTorch dataloaders as train and valid arguments
DataLoaders.from_hf_dd
DataLoaders.from_hf_dd (dd, batch_size, collate_fn=<function hf_ds_collate_fn>, **kwargs)
Factory method to create a Dataloaders object for a Huggingface Dataset dict, uses the hf_ds_collate_func collation function by default, **kwargs are passes to the DataLoaders
Example usage:
name = "fashion_mnist"
ds_builder = load_dataset_builder(name)
ds_hf = load_dataset(name)Downloading and preparing dataset fashion_mnist/fashion_mnist (download: 29.45 MiB, generated: 34.84 MiB, post-processed: Unknown size, total: 64.29 MiB) to /root/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/8d6c32399aa01613d96e2cbc9b13638f359ef62bb33612b077b4c247f6ef99c1...
Dataset fashion_mnist downloaded and prepared to /root/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/8d6c32399aa01613d96e2cbc9b13638f359ef62bb33612b077b4c247f6ef99c1. Subsequent calls will reuse this data.
def accuracy(preds, targs):
return (preds.argmax(dim=1) == targs).float().mean()
def fit(epochs):
for epoch in range(epochs):
model.train()
n_t = train_loss_s = 0
for xb, yb in dls.train:
preds = model(xb)
train_loss = loss_func(preds, yb)
train_loss.backward()
n_t += len(xb)
train_loss_s += train_loss.item() * len(xb)
opt.step()
opt.zero_grad()
model.eval()
n_v = valid_loss_s = acc_s = 0
for xb, yb in dls.valid:
with torch.no_grad():
preds = model(xb)
valid_loss = loss_func(preds, yb)
n_v += len(xb)
valid_loss_s += valid_loss.item() * len(xb)
acc_s += accuracy(preds, yb) * len(xb)
train_loss = train_loss_s / n_t
valid_loss = valid_loss_s / n_v
acc = acc_s / n_v
print(f'{epoch=} | {train_loss=:.3f} | {valid_loss=:.3f} | {acc=:.3f}')
def get_model_opt():
layers = [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_out)]
model = nn.Sequential(*layers)
opt = torch.optim.SGD(model.parameters(), lr)
return model, opt
n_in = 28*28
n_h = 50
n_out = 10
lr = 0.01
bs = 1024
loss_func = F.cross_entropy
model, opt = get_model_opt()
dls = DataLoaders.from_hf_dd(ds_hf, bs)
fit(1)epoch=0 | train_loss=2.185 | valid_loss=2.070 | acc=0.407