from datasets import load_dataset,load_dataset_builder
import torchvision.transforms.functional as TF
import torch
import torch.nn as nn
import torch.nn.functional as F
Dataloaders
Module containing helper functions and classes around dataloaders
hf_ds_collate_fn
hf_ds_collate_fn (data, flatten=True)
Collation function for building a PyTorch DataLoader from a a huggingface dataset. Tries to put all items from an entry into the dataset to tensor. PIL images are converted to tensor, either flattened or not
DataLoaders
DataLoaders (train, valid)
Class that exposes two PyTorch dataloaders as train and valid arguments
DataLoaders.from_hf_dd
DataLoaders.from_hf_dd (dd, batch_size, collate_fn=<function hf_ds_collate_fn>, **kwargs)
Factory method to create a Dataloaders object for a Huggingface Dataset dict, uses the hf_ds_collate_func
collation function by default, **kwargs are passes to the DataLoaders
Example usage:
= "fashion_mnist"
name = load_dataset_builder(name)
ds_builder = load_dataset(name) ds_hf
Downloading and preparing dataset fashion_mnist/fashion_mnist (download: 29.45 MiB, generated: 34.84 MiB, post-processed: Unknown size, total: 64.29 MiB) to /root/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/8d6c32399aa01613d96e2cbc9b13638f359ef62bb33612b077b4c247f6ef99c1...
Dataset fashion_mnist downloaded and prepared to /root/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/8d6c32399aa01613d96e2cbc9b13638f359ef62bb33612b077b4c247f6ef99c1. Subsequent calls will reuse this data.
def accuracy(preds, targs):
return (preds.argmax(dim=1) == targs).float().mean()
def fit(epochs):
for epoch in range(epochs):
model.train() = train_loss_s = 0
n_t for xb, yb in dls.train:
= model(xb)
preds = loss_func(preds, yb)
train_loss
train_loss.backward()
+= len(xb)
n_t += train_loss.item() * len(xb)
train_loss_s
opt.step()
opt.zero_grad()
eval()
model.= valid_loss_s = acc_s = 0
n_v for xb, yb in dls.valid:
with torch.no_grad():
= model(xb)
preds = loss_func(preds, yb)
valid_loss
+= len(xb)
n_v += valid_loss.item() * len(xb)
valid_loss_s += accuracy(preds, yb) * len(xb)
acc_s
= train_loss_s / n_t
train_loss = valid_loss_s / n_v
valid_loss = acc_s / n_v
acc print(f'{epoch=} | {train_loss=:.3f} | {valid_loss=:.3f} | {acc=:.3f}')
def get_model_opt():
= [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_out)]
layers = nn.Sequential(*layers)
model
= torch.optim.SGD(model.parameters(), lr)
opt
return model, opt
= 28*28
n_in = 50
n_h = 10
n_out = 0.01
lr = 1024
bs = F.cross_entropy
loss_func
= get_model_opt()
model, opt
= DataLoaders.from_hf_dd(ds_hf, bs)
dls
1) fit(
epoch=0 | train_loss=2.185 | valid_loss=2.070 | acc=0.407