1

I was asked to create an Autoencoder that reconstructs the binary CSV file (decode).

I implemented one based on the MNIST example from geeksforgeeks. But I am very uncertain about the correctness, including the calculation of loss and the relu and linear part. I did some research, and it seems like BCEloss is also better than MSEloss in these circumstances.

Any suggestion?

The following code could generate the output, but the loss is extremely small. What would be some recommended batch size, hidden dimension layers and number of epochs.

enter image description here

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Generate toy data: D = 100 patients, K = 10 phenotypes, binary values (0 or 1)
D, K = dm.shape
#data = np.random.randint(0, 2, size=(D, K)).astype(np.float32)

# Convert the numpy array to a PyTorch tensor
data_tensor = torch.tensor(dm)
print(data_tensor.shape)

# Define the Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU()
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Hyperparameters
input_dim = K
hidden_dim = 5  # Hidden layer dimension, you need adjusted

# Initialize the model, loss function, and optimizer
model = Autoencoder(input_dim=input_dim, hidden_dim=hidden_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training parameters
num_epochs = 5
batch_size = 10

# Training loop
for epoch in range(num_epochs):
    for i in range(0, D, batch_size):
        batch_data = data_tensor[i:i+batch_size]

        # Forward pass
        outputs = model(batch_data)
        loss = criterion(outputs, batch_data)

        # Backward pass and optimization
        optimizer.zero_grad()  
        loss.backward()
        optimizer.step()

    #print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss}')
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    


# Testing the model by reconstructing the input data
with torch.no_grad():
    reconstructed = model(data_tensor)

print("Original Data:")
print(data_tensor)

print("Reconstructed Data:")
print(reconstructed)

0

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.