I am learning to use the TensorFlow Recommenders library and the subclassing API and have been working through the documentation. I would like to know how to run and review the output of the individual sub-models that feed into the final model.
I have recreated an excerpt of the first sub model (I have compiled and fit the full model and it runs okay).
I would like to run this UserFeaturesModel in isolation, to be able to inspect the concatenated data that it outputs so that I can determine if it is doing what it should, what its shape is, and to understand more about how it feeds into the next sub model.
But I am not sure how to compile and fit this model, as it requires a loss function when I try to compile, but is this model not just simply running embeddings and therefore has no loss function?
Apologies if this is a stupid question but I have spent days trying to figure this out and have tried looking through previous questions on Stack and watched many tutorials - please could someone show me to run this model and generate an output that I can review?
# imports
import tensorflow as tf
import tensorflow_recommenders as tfrs
import tensorflow_datasets as tfds
import numpy as np
# load datasets from tf
ratings = tfds.load("movielens/100k-ratings", split="train")
movies = tfds.load("movielens/100k-movies", split="train")
# keep required feature columns for each dataset
ratings = ratings.map(lambda x: {
"movie_title": x["movie_title"],
"user_id": x["user_id"],
"user_rating": x["user_rating"],
"timestamp": x["timestamp"],
"raw_user_age": x["raw_user_age"],
})
movies = movies.map(lambda x: x["movie_title"])
# create datasets of user_ids and movie_titles
user_ids = ratings.batch(1000).map(lambda x: x['user_id'])
movie_titles = movies.batch(1000)
# find unique vocabs from above datasets
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
unique_movie_titles = np.unique(np.concatenate(list(movie_titles)))
# create timestamp array for bucketization and normalization
timestamps = np.concatenate(list(ratings.map(lambda x: x["timestamp"]).batch(100)))
max_timestamp = timestamps.max()
min_timestamp = timestamps.min()
timestamp_buckets = np.linspace(min_timestamp, max_timestamp, num = 2000)
# create user_age array for normalization
user_age = np.concatenate(list(ratings.map(lambda x: x["raw_user_age"]).batch(100)))
# vocab sizes for embedding
user_vocab_size = len(unique_user_ids) + 1
movie_vocab_size = len(unique_movie_titles) + 1
timestamp_buckets_size = len(timestamp_buckets) + 1
# set random seed
tf.random.set_seed(42)
# shuffle and split data 80:20
shuffled_data = ratings.shuffle(100000, seed=42, reshuffle_each_iteration=False)
train = shuffled_data.take(80000)
test = shuffled_data.skip(80000).take(20000)
# batch and cache the train and test datasets
cached_train = train.shuffle(80000).batch(512).cache()
cached_test = test.batch(512).cache()
# define model
class UserFeaturesModel(tf.keras.Model):
# model construction and attributes
def __init__(self):
super().__init__()
embedding_dim = 32
# user_id feature embeddings
self.user_id_embeddings = tf.keras.Sequential([
tf.keras.layers.StringLookup(vocabulary=unique_user_ids, mask_token=None),
tf.keras.layers.Embedding(user_vocab_size, embedding_dim)
])
# timestamp bucket featrure embeddings
self.timestamp_bucket_embeddings = tf.keras.Sequential([
tf.keras.layers.Discretization(timestamp_buckets.tolist()),
tf.keras.layers.Embedding(timestamp_buckets_size, embedding_dim)
])
# timestamp normalization
self.normalized_timestamp = tf.keras.layers.Normalization(axis=None)
self.normalized_timestamp.adapt(timestamps)
# age normalization
self.normalized_user_age = tf.keras.layers.Normalization(axis=None)
self.normalized_user_age.adapt(user_age)
# model call and forward pass
def call(self, inputs):
return tf.concat([
self.user_id_embeddings(inputs['user_id']),
self.timestamp_bucket_embeddings(inputs['timestamp']),
tf.reshape(self.normalized_timestamp(inputs['timestamp']),(-1,1)),
tf.reshape(self.normalized_user_age(inputs['raw_user_age']),(-1,1))
], axis=1)
# instantiate model
model = UserFeaturesModel()
# run model - not sure how to compile, fit and run model and inspect output?
run model - not sure how to compile, fit and run model and inspect output?