Follow up - Deep Learning Project for House Plant Identification on Kaggle Notebook

Question

The code is based on feedback that I received from my previous question regarding this project.

I've developed the initial code for a deep learning project focused on identifying house plants within a Kaggle Notebook. I will eventually convert it into a Flask application. The code is currently operational. However, I'm looking for feedback to confirm the code's quality and adherence to appropriate standards.

Here's my code in Kaggle Notebook:

# --- Imports ---
import logging
import os
import random
from pathlib import Path
from timeit import default_timer as timer

import matplotlib.pyplot as plt
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, random_split
from torchinfo import summary
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from tqdm.auto import tqdm


import warnings
warnings.filterwarnings('ignore')


# --- Configuration ---
class TrainingConfig:
    """
    Configuration class to hold all hyperparameters and settings.
    This avoids "magic numbers" and makes the script easy to modify.
    """
    # Directory and Path Configuration
    DATA_DIR = Path("/kaggle/input/house-plant-species/house_plant_species")
    MODEL_SAVE_PATH = "house_plant_classifier_v1.pth"

    # Data Preprocessing and Loading
    BATCH_SIZE = 32
    TRAIN_SPLIT = 0.8
    VAL_SPLIT = 0.1
    # Note: Test split is calculated as 1.0 - TRAIN_SPLIT - VAL_SPLIT
    NUM_WORKERS = os.cpu_count() or 2

    # Model Architecture
    DROPOUT_RATE = 0.2

    # Training Configuration
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    LEARNING_RATE = 1e-3
    EPOCHS = 10
    SEED = 42

# --- Data Loading and Preparation ---
def create_dataloaders(
    data_dir: Path,
    transform: transforms.Compose,
    config: TrainingConfig,
) -> tuple[DataLoader, DataLoader, DataLoader, list[str]]:
    """Creates training, validation, and test DataLoaders from an ImageFolder.

    Args:
        data_dir: Path to the root data directory.
        transform: Torchvision transforms to apply.
        config: The configuration object.

    Returns:
        A tuple of (train_dataloader, val_dataloader, test_dataloader, class_names).
    """
    dataset = ImageFolder(root=data_dir, transform=transform)
    class_names = dataset.classes

    # Data Splitting
    total_size = len(dataset)
    train_size = int(config.TRAIN_SPLIT * total_size)
    val_size = int(config.VAL_SPLIT * total_size)
    test_size = total_size - train_size - val_size

    # Ensure seeding for reproducible splits
    generator = torch.Generator().manual_seed(config.SEED)
    train_data, val_data, test_data = random_split(
        dataset, [train_size, val_size, test_size], generator=generator
    )

    # Create DataLoaders
    train_dataloader = DataLoader(
        train_data,
        batch_size=config.BATCH_SIZE,
        shuffle=True,
        num_workers=config.NUM_WORKERS,
        pin_memory=True,
    )
    val_dataloader = DataLoader(
        val_data,
        batch_size=config.BATCH_SIZE,
        shuffle=False,
        num_workers=config.NUM_WORKERS,
        pin_memory=True,
    )
    # Store test_data inside the dataloader for later visualization
    test_dataloader = DataLoader(
        test_data,
        batch_size=config.BATCH_SIZE,
        shuffle=False,
        num_workers=config.NUM_WORKERS,
        pin_memory=True,
    )
    test_dataloader.dataset_unbatched = test_data

    return train_dataloader, val_dataloader, test_dataloader, class_names


# --- Model Architecture ---
def create_efficientnet_b0(
    num_classes: int, config: TrainingConfig
) -> tuple[nn.Module, transforms.Compose]:
    """Creates an EfficientNet-B0 model with a custom classifier head.

    Args:
        num_classes: Number of output classes for the model.
        config: The configuration object.

    Returns:
        A tuple containing the PyTorch model and the appropriate transforms.
    """
    weights = models.EfficientNet_B0_Weights.DEFAULT
    model = models.efficientnet_b0(weights=weights).to(config.DEVICE)

    # Freeze all base layers
    for param in model.features.parameters():
        param.requires_grad = False

    # Recreate the classifier head with a new seed
    torch.manual_seed(config.SEED)
    model.classifier = nn.Sequential(
        nn.Dropout(p=config.DROPOUT_RATE, inplace=True),
        nn.Linear(in_features=1280, out_features=num_classes),
    ).to(config.DEVICE)

    return model, weights.transforms()


# --- Training Engine ---
def train_step(
    model: nn.Module,
    dataloader: DataLoader,
    loss_fn: nn.Module,
    optimizer: optim.Optimizer,
    device: str,
) -> tuple[float, float]:
    """Performs a single training step over one epoch.

    Returns:
        A tuple containing the average training loss and accuracy per batch.
    """
    model.train()
    train_loss, train_acc = 0.0, 0.0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        y_pred = model(images)
        loss = loss_fn(y_pred, labels)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == labels).sum().item() / len(y_pred)
    return train_loss / len(dataloader), train_acc / len(dataloader)


def test_step(
    model: nn.Module, dataloader: DataLoader, loss_fn: nn.Module, device: str
) -> tuple[float, float]:
    """Performs a single testing/validation step over one epoch.

    Returns:
        A tuple containing the average test/validation loss and accuracy per batch.
    """
    model.eval()
    test_loss, test_acc = 0.0, 0.0
    with torch.inference_mode():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            test_pred_logits = model(images)
            loss = loss_fn(test_pred_logits, labels)
            test_loss += loss.item()
            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += (test_pred_labels == labels).sum().item() / len(test_pred_labels)
    return test_loss / len(dataloader), test_acc / len(dataloader)


def train(
    model: nn.Module,
    train_dataloader: DataLoader,
    val_dataloader: DataLoader,
    optimizer: optim.Optimizer,
    loss_fn: nn.Module,
    config: TrainingConfig,
) -> dict[str, list[float]]:
    """Trains and validates a PyTorch model for a given number of epochs."""
    results = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}
    for epoch in tqdm(range(config.EPOCHS)):
        train_loss, train_acc = train_step(
            model, train_dataloader, loss_fn, optimizer, config.DEVICE
        )
        val_loss, val_acc = test_step(
            model, val_dataloader, loss_fn, config.DEVICE
        )
        # The print statement is kept for immediate feedback in an interactive notebook
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | train_acc: {train_acc:.4f} | "
            f"val_loss: {val_loss:.4f} | val_acc: {val_acc:.4f}"
        )
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["val_loss"].append(val_loss)
        results["val_acc"].append(val_acc)
    return results


# --- Utility and Evaluation Functions ---
def save_model(model: nn.Module, save_path: str) -> None:
    """Saves the model's state dictionary to a file.

    Args:
        model: The PyTorch model to save.
        save_path: The path where the model will be saved.
    """
    logging.info(f"Saving model to: {save_path}")
    torch.save(obj=model.state_dict(), f=save_path)


def plot_loss_curves(results: dict[str, list[float]]) -> None:
    """Plots training and validation loss and accuracy curves."""
    plt.figure(figsize=(15, 7))
    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(results["train_loss"], label="train_loss")
    plt.plot(results["val_loss"], label="val_loss")
    plt.title("Loss Curves")
    plt.xlabel("Epochs")
    plt.legend()
    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(results["train_acc"], label="train_accuracy")
    plt.plot(results["val_acc"], label="val_accuracy")
    plt.title("Accuracy Curves")
    plt.xlabel("Epochs")
    plt.legend()
    plt.show()


def evaluate_and_plot_predictions(
    model: nn.Module,
    test_dataloader: DataLoader,
    class_names: list[str],
    device: str
) -> None:
    """Makes predictions on random test samples and visualizes them."""
    # Get random samples
    test_samples, test_labels = [], []
    # Ensure there are enough samples to draw from, otherwise, use all available samples
    num_samples_to_plot = min(9, len(test_dataloader.dataset_unbatched))
    for sample, label in random.sample(list(test_dataloader.dataset_unbatched), k=num_samples_to_plot):
        test_samples.append(sample)
        test_labels.append(label)

    # Make predictions
    model.eval()
    pred_probs = []
    with torch.inference_mode():
        for sample in test_samples:
            sample = sample.unsqueeze(0).to(device)
            pred_logit = model(sample)
            pred_prob = torch.softmax(pred_logit.squeeze(), dim=0)
            pred_probs.append(pred_prob.cpu())
    pred_classes = torch.stack(pred_probs).argmax(dim=1)


    # Plot predictions
    plt.figure(figsize=(14, 14))
    plt.suptitle("Model Predictions vs. Truth", fontsize=16)

    for i, sample in enumerate(test_samples):
        plt.subplot(3, 3, i + 1)
        # Tensors need to be on CPU and permuted for matplotlib
        plt.imshow(sample.cpu().permute(1, 2, 0))
        pred_label = class_names[pred_classes[i]]
        truth_label = class_names[test_labels[i]]

        title = f"Pred: {pred_label}\nTruth: {truth_label}"
        plt.title(title, fontsize=9, c="g" if pred_label == truth_label else "r")
        plt.axis(False)

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()


# --- Main Execution Block ---
def main():
    """Main function to orchestrate model training and evaluation."""
    # Setup logging
    logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")

    # Initialize configuration
    config = TrainingConfig()
    logging.info(f"Using device: {config.DEVICE}")
    logging.info(f"Using seed: {config.SEED}")

    # Set seeds for reproducibility
    torch.manual_seed(config.SEED)
    torch.cuda.manual_seed(config.SEED)

    # The pretrained model defines its own transforms. We must get them this way.
    dummy_num_classes = 10
    _, auto_transforms = create_efficientnet_b0(dummy_num_classes, config)
    logging.info(f"Applying transforms: {auto_transforms}")

    # Create DataLoaders
    train_dl, val_dl, test_dl, class_names = create_dataloaders(
        data_dir=config.DATA_DIR, transform=auto_transforms, config=config
    )
    logging.info(f"Found {len(class_names)} classes.")
    logging.info(f"Train batches: {len(train_dl)}, Val batches: {len(val_dl)}, Test batches: {len(test_dl)}")

    # Re-create the model with the correct number of classes
    model, _ = create_efficientnet_b0(len(class_names), config)
    summary(
        model,
        input_size=(config.BATCH_SIZE, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"],
    )

    # Train the model
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE)

    start_time = timer()
    results = train(model, train_dl, val_dl, optimizer, loss_fn, config)
    end_time = timer()
    logging.info(f"Total training time: {end_time - start_time:.3f} seconds")

    # Save the trained model
    save_model(model=model, save_path=config.MODEL_SAVE_PATH)

    # Evaluate the model with plots
    plot_loss_curves(results)
    evaluate_and_plot_predictions(model, test_dl, class_names, config.DEVICE)


if __name__ == "__main__":
    main()

Here is the link to my Kaggle Notebook:

https://www.kaggle.com/code/steveaustin583/house-plant-identification

toolic · Accepted Answer · 2025-06-28 18:41:28Z

In function train_step:

for images, labels in dataloader:
    images, labels = images.to(device), labels.to(device)

it seems strange that you set images in the for line, then immediately set it to a different value in the next line. Perhaps it is idiomatic for PyTorch.

I also see this pattern in functions test_step and evaluate_and_plot_predictions as well.

Regardless, I find multiple assignments in one line like this:

images, labels = images.to(device), labels.to(device)

to be easier to understand as separate lines:

images = images.to(device)
labels = labels.to(device)

DRY

The expression len(dataloader) is repeated twice in:

return train_loss / len(dataloader), train_acc / len(dataloader)

You could set it to a variable, and it will only execute len once.

Unless you know len(dataloader) is guaranteed to be non-zero, you should check it to avoid a divide-by-0 error. The same goes for len(y_pred).

Naming

In code about house plants, this constant:

SEED = 42

could be ambiguous. Consider giving it a more specific name.

Stack Exchange Network

Follow up - Deep Learning Project for House Plant Identification on Kaggle Notebook

1 Answer 1

DRY

Naming

You must log in to answer this question.

Linked

Hot Network Questions

1 Answer 1

Naming

You must log in to answer this question.

Linked

Related