I know an autoencoder (AE) can compress information and extract new features which represent the input data. I found a paper which used AE to evaluate the importance of every feature in the origin matrix. In the subsequent analysis, the research leveraged these selected features to establish machine learning model like random forest, XGBoost, and logistic regression.
I want to know how to use AE to select features and the weight? while the encoder may have multiple layers with different weights? I used PyTorch to train the AE model for my matrix. The following code block is my pytorch code.
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.utils.data as Data
import pandas as pd
input_data = pd.read_csv("D:/tcga/antoencoder/input_ae.txt",delimiter='\t', index_col=0)
###define autoencoder function
class AutoEncoder(nn.Module):
def __init__(self):
super(AutoEncoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(428, 128),
nn.Tanh(),
nn.Linear(128, 64),
nn.Tanh(),
nn.Linear(64, 36),
nn.Tanh(),
nn.Linear(36,18),
nn.Tanh(),
nn.Linear(18,9)
)
self.decoder = nn.Sequential(
nn.Linear(9, 18),
nn.Tanh(),
nn.Linear(18,36),
nn.Tanh(),
nn.Linear(36,64),
nn.Tanh(),
nn.Linear(64, 128),
nn.Tanh(),
nn.Linear(128, 428),
nn.Tanh()
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return encoded, decoded
loss_func = nn.MSELoss()
BATCH_SIZE = 450
LR = 0.005
EPOCH = 100
autoencoder = AutoEncoder()
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR)
for epoch in range(EPOCH):
for i in range(0, tensor2.size(0), BATCH_SIZE):
end_idx = min(i + BATCH_SIZE, tensor2.size(0))
b_x = tensor2[i:end_idx]
b_y = tensor2[i:end_idx]
encoded, decoded = autoencoder(b_x)
loss = loss_func(decoded, b_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 5 == 0:
print('Epoch: ', epoch, '| train loss: %.4f' % loss.item())