Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The saved keras model cannot be loaded. #19899

Open
alkaou opened this issue Jun 21, 2024 · 6 comments
Open

The saved keras model cannot be loaded. #19899

alkaou opened this issue Jun 21, 2024 · 6 comments
Assignees
Labels
type:support User is asking for help / asking an implementation question. Stackoverflow would be better suited.

Comments

@alkaou
Copy link

alkaou commented Jun 21, 2024

After training I saved my model. and I can't load. I tried everything, but it always gives me a custom_objects error

I based myself on the GPT miniature code in the doc

code:

import os

os.environ["KERAS_BACKEND"] = "tensorflow"

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import ops
from tensorflow.keras.layers import TextVectorization
import numpy as np
import os
import string
import random
import tensorflow
import tensorflow.data as tf_data
import tensorflow.strings as tf_strings
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    """
    Mask the upper half of the dot product matrix in self attention.
    This prevents flow of information from future tokens to current token.
    1's in the lower triangle, counting from the lower right corner.
    """
    i = ops.arange(n_dest)[:, None]
    j = ops.arange(n_src)
    m = i >= j - n_src + n_dest
    mask = ops.cast(m, dtype)
    mask = ops.reshape(mask, [1, n_dest, n_src])
    mult = ops.concatenate(
        [ops.expand_dims(batch_size, -1), ops.convert_to_tensor([1, 1])], 0
    )
    return ops.tile(mask, mult)


class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads, embed_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(ff_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs):
        input_shape = ops.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool")
        attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
        attention_output = self.dropout1(attention_output)
        out1 = self.layernorm1(inputs + attention_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = ops.shape(x)[-1]
        positions = ops.arange(0, maxlen, 1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions
vocab_size = 20000  # Only consider the top 20k words
maxlen = 80  # Max sequence size
embed_dim = 256  # Embedding size for each token
num_heads = 4  # Number of attention heads
feed_forward_dim = 256  # Hidden layer size in feed forward network inside transformer


def create_model():
    inputs = layers.Input(shape=(maxlen,), dtype="int32")
    embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
    x = embedding_layer(inputs)
    transformer_block = TransformerBlock(embed_dim, num_heads, feed_forward_dim)
    x = transformer_block(x)
    outputs = layers.Dense(vocab_size)(x)
    model = keras.Model(inputs=inputs, outputs=[outputs, x])
    loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(
        "Adam",
        loss=[loss_fn, None],
    )  # No loss and optimization based on word embeddings from transformer block
    return model
batch_size = 128

# The dataset contains each review in a separate text file
# The text files are present in four different folders
# Create a list all files
filenames = [
    "datasets/data_1.txt",
    "datasets/data_2.txt",
    "datasets/data_3.txt",
    "datasets/data_4.txt",
]
# directories = [
#     "aclImdb/train/pos",
#     "aclImdb/train/neg",
#     "aclImdb/test/pos",
#     "aclImdb/test/neg",
# ]
# for dir in directories:
#     for f in os.listdir(dir):
#         filenames.append(os.path.join(dir, f))

print(f"{len(filenames)} files")

# Create a dataset from text files
random.shuffle(filenames)
text_ds = tf_data.TextLineDataset(filenames)
text_ds = text_ds.shuffle(buffer_size=256)
text_ds = text_ds.batch(batch_size)
# print(dir(text_ds))
def custom_standardization(input_string):
    """Remove html line-break tags and handle punctuation"""
    lowercased = tf_strings.lower(input_string)
    stripped_html = tf_strings.regex_replace(lowercased, "<br />", " ")
    return tf_strings.regex_replace(stripped_html, f"([{string.punctuation}])", r" \1")


# Create a vectorization layer and adapt it to the text
vectorize_layer = TextVectorization(
    standardize=custom_standardization,
    max_tokens=vocab_size - 1,
    output_mode="int",
    output_sequence_length=maxlen + 1,
)
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()  # To get words back from token indices


def prepare_lm_inputs_labels(text):
    """
    Shift word sequences by 1 position so that the target for position (i) is
    word at position (i+1). The model will use all words up till position (i)
    to predict the next word.
    """
    text = tensorflow.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    print(tokenized_sentences)
    print(y)
    return x, y

text_ds = text_ds.map(prepare_lm_inputs_labels, num_parallel_calls=tf_data.AUTOTUNE)
text_ds = text_ds.prefetch(tf_data.AUTOTUNE)
class TextGenerator(keras.callbacks.Callback):
    """A callback to generate text from a trained model.
    1. Feed some starting prompt to the model
    2. Predict probabilities for the next token
    3. Sample the next token and add it to the next input

    Arguments:
        max_tokens: Integer, the number of tokens to be generated after prompt.
        start_tokens: List of integers, the token indices for the starting prompt.
        index_to_word: List of strings, obtained from the TextVectorization layer.
        top_k: Integer, sample from the `top_k` token predictions.
        print_every: Integer, print after this many epochs.
    """

    def __init__(
        self, max_tokens, start_tokens, index_to_word, top_k=10, print_every=1
    ):
        self.max_tokens = max_tokens
        self.start_tokens = start_tokens
        self.index_to_word = index_to_word
        self.print_every = print_every
        self.k = top_k

    def sample_from(self, logits):
        logits, indices = ops.top_k(logits, k=self.k, sorted=True)
        indices = np.asarray(indices).astype("int32")
        preds = keras.activations.softmax(ops.expand_dims(logits, 0))[0]
        preds = np.asarray(preds).astype("float32")
        return np.random.choice(indices, p=preds)

    def detokenize(self, number):
        return self.index_to_word[number]

    def on_epoch_end(self, epoch, logs=None):
        start_tokens = [_ for _ in self.start_tokens]
        if (epoch + 1) % self.print_every != 0:
            return
        num_tokens_generated = 0
        tokens_generated = []
        while num_tokens_generated <= self.max_tokens:
            pad_len = maxlen - len(start_tokens)
            sample_index = len(start_tokens) - 1
            if pad_len < 0:
                x = start_tokens[:maxlen]
                sample_index = maxlen - 1
            elif pad_len > 0:
                x = start_tokens + [0] * pad_len
            else:
                x = start_tokens
            x = np.array([x])
            y, _ = self.model.predict(x, verbose=0)
            sample_token = self.sample_from(y[0][sample_index])
            tokens_generated.append(sample_token)
            start_tokens.append(sample_token)
            num_tokens_generated = len(tokens_generated)
        txt = " ".join(
            [self.detokenize(_) for _ in self.start_tokens + tokens_generated]
        )
        print(f"generated text:\n{txt}\n")


# Tokenize starting prompt
word_to_index = {}
for index, word in enumerate(vocab):
    word_to_index[word] = index

start_prompt = "what is keras ?"
start_tokens = [word_to_index.get(_, 1) for _ in start_prompt.split()]
num_tokens_generated = 40
text_gen_callback = TextGenerator(num_tokens_generated, start_tokens, vocab)
model = create_model()
model.fit(text_ds, verbose=2, epochs=max_ecpoch, callbacks=[text_gen_callback])
model.save("model.keras")

### Now, when i try to load my model:

from tensorflow.keras import models
br_model = models.load_model("model.keras")
br_model.summary()

I'm getting this errors:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\serialization_lib.py:718, in deserialize_keras_object(config, custom_objects, safe_mode, **kwargs)
    717 try:
--> 718     instance = cls.from_config(inner_config)
    719 except TypeError as e:

File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\models\model.py:517, in Model.from_config(cls, config, custom_objects)
    515     from keras.src.models.functional import functional_from_config
--> 517     return functional_from_config(
    518         cls, config, custom_objects=custom_objects
    519     )
    521 # Either the model has a custom __init__, or the config
    522 # does not contain all the information necessary to
    523 # revive a Functional model. This happens when the user creates
   (...)
    526 # In this case, we fall back to provide all config into the
    527 # constructor of the class.

File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\models\functional.py:517, in functional_from_config(cls, config, custom_objects)
    516 for layer_data in config["layers"]:
--> 517     process_layer(layer_data)
    519 # Then we process nodes in order of layer depth.
    520 # Nodes that cannot yet be processed (if the inbound node
    521 # does not yet exist) are re-enqueued, and the process
    522 # is repeated until all nodes are processed.

File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\models\functional.py:501, in functional_from_config.<locals>.process_layer(layer_data)
    500 else:
--> 501     layer = serialization_lib.deserialize_keras_object(
    502         layer_data, custom_objects=custom_objects
    503     )
    504 created_layers[layer_name] = layer

File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\serialization_lib.py:694, in deserialize_keras_object(config, custom_objects, safe_mode, **kwargs)
    692         return obj
--> 694 cls = _retrieve_class_or_fn(
    695     class_name,
    696     registered_name,
    697     module,
    698     obj_type="class",
    699     full_config=config,
    700     custom_objects=custom_objects,
    701 )
    703 if isinstance(cls, types.FunctionType):

File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\serialization_lib.py:812, in _retrieve_class_or_fn(name, registered_name, module, obj_type, full_config, custom_objects)
    810         return obj
--> 812 raise TypeError(
    813     f"Could not locate {obj_type} '{name}'. "
    814     "Make sure custom classes are decorated with "
    815     "`@keras.saving.register_keras_serializable()`. "
    816     f"Full object config: {full_config}"
    817 )

TypeError: Could not locate class 'TokenAndPositionEmbedding'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': None, 'class_name': 'TokenAndPositionEmbedding', 'config': {'maxlen': 80, 'vocab_size': 20000, 'embed_dim': 256, 'trainable': True, 'dtype': 'float32'}, 'registered_name': 'TokenAndPositionEmbedding', 'build_config': {'input_shape': [None, 80]}, 'name': 'token_and_position_embedding_11', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80], 'dtype': 'int32', 'keras_history': ['input_layer_8', 0, 0]}}], 'kwargs': {}}]}

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
Cell In[2], line 5
      1 # from tensorflow.keras.utils import custom_object_scope
      2 # from tensorflow.keras.utils import get_custom_objects
      3 from tensorflow.keras import models
----> 5 br_model = models.load_model("E:\\Alkaou\Python Projects\\models\\br_model.keras")
      7 br_model.summary()

File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\saving_api.py:176, in load_model(filepath, custom_objects, compile, safe_mode)
    173         is_keras_zip = True
    175 if is_keras_zip:
--> 176     return saving_lib.load_model(
    177         filepath,
    178         custom_objects=custom_objects,
    179         compile=compile,
    180         safe_mode=safe_mode,
    181     )
    182 if str(filepath).endswith((".h5", ".hdf5")):
    183     return legacy_h5_format.load_model_from_hdf5(
    184         filepath, custom_objects=custom_objects, compile=compile
    185     )

File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\saving_lib.py:152, in load_model(filepath, custom_objects, compile, safe_mode)
    147     raise ValueError(
    148         "Invalid filename: expected a `.keras` extension. "
    149         f"Received: filepath={filepath}"
    150     )
    151 with open(filepath, "rb") as f:
--> 152     return _load_model_from_fileobj(
    153         f, custom_objects, compile, safe_mode
    154     )

File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\saving_lib.py:170, in _load_model_from_fileobj(fileobj, custom_objects, compile, safe_mode)
    168 # Construct the model from the configuration file in the archive.
    169 with ObjectSharingScope():
--> 170     model = deserialize_keras_object(
    171         config_dict, custom_objects, safe_mode=safe_mode
    172     )
    174 all_filenames = zf.namelist()
    175 if _VARS_FNAME + ".h5" in all_filenames:

File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\serialization_lib.py:720, in deserialize_keras_object(config, custom_objects, safe_mode, **kwargs)
    718     instance = cls.from_config(inner_config)
    719 except TypeError as e:
--> 720     raise TypeError(
    721         f"{cls} could not be deserialized properly. Please"
    722         " ensure that components that are Python object"
    723         " instances (layers, models, etc.) returned by"
    724         " `get_config()` are explicitly deserialized in the"
    725         " model's `from_config()` method."
    726         f"\n\nconfig={config}.\n\nException encountered: {e}"
    727     )
    728 build_config = config.get("build_config", None)
    729 if build_config and not instance.built:

TypeError: <class 'keras.src.models.functional.Functional'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': 'keras.src.models.functional', 'class_name': 'Functional', 'config': {'name': 'functional_11', 'trainable': True, 'layers': [{'module': 'keras.layers', 'class_name': 'InputLayer', 'config': {'batch_shape': [None, 80], 'dtype': 'int32', 'sparse': False, 'name': 'input_layer_8'}, 'registered_name': None, 'name': 'input_layer_8', 'inbound_nodes': []}, {'module': None, 'class_name': 'TokenAndPositionEmbedding', 'config': {'maxlen': 80, 'vocab_size': 20000, 'embed_dim': 256, 'trainable': True, 'dtype': 'float32'}, 'registered_name': 'TokenAndPositionEmbedding', 'build_config': {'input_shape': [None, 80]}, 'name': 'token_and_position_embedding_11', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80], 'dtype': 'int32', 'keras_history': ['input_layer_8', 0, 0]}}], 'kwargs': {}}]}, {'module': None, 'class_name': 'TransformerBlock', 'config': {'embed_dim': 256, 'num_heads': 2, 'ff_dim': 256, 'trainable': True, 'dtype': 'float32'}, 'registered_name': 'TransformerBlock', 'build_config': {'input_shape': [None, 80, 256]}, 'name': 'transformer_block_3', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80, 256], 'dtype': 'float32', 'keras_history': ['token_and_position_embedding_11', 0, 0]}}], 'kwargs': {}}]}, {'module': 'keras.layers', 'class_name': 'Dense', 'config': {'name': 'dense_11', 'trainable': True, 'dtype': 'float32', 'units': 20000, 'activation': 'linear', 'use_bias': True, 'kernel_initializer': {'module': 'keras.initializers', 'class_name': 'GlorotUniform', 'config': {'seed': None}, 'registered_name': None}, 'bias_initializer': {'module': 'keras.initializers', 'class_name': 'Zeros', 'config': {}, 'registered_name': None}, 'kernel_regularizer': None, 'bias_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}, 'registered_name': None, 'build_config': {'input_shape': [None, 80, 256]}, 'name': 'dense_11', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80, 256], 'dtype': 'float32', 'keras_history': ['transformer_block_3', 0, 0]}}], 'kwargs': {}}]}], 'input_layers': [['input_layer_8', 0, 0]], 'output_layers': [['dense_11', 0, 0], ['transformer_block_3', 0, 0]]}, 'registered_name': 'Functional', 'build_config': {'input_shape': None}, 'compile_config': {'optimizer': 'Adam', 'loss': [{'module': 'keras.losses', 'class_name': 'SparseCategoricalCrossentropy', 'config': {'name': 'sparse_categorical_crossentropy', 'reduction': 'sum_over_batch_size', 'from_logits': True, 'ignore_class': None}, 'registered_name': None}, None], 'loss_weights': None, 'metrics': None, 'weighted_metrics': None, 'run_eagerly': False, 'steps_per_execution': 1, 'jit_compile': False}}.

Exception encountered: Could not locate class 'TokenAndPositionEmbedding'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': None, 'class_name': 'TokenAndPositionEmbedding', 'config': {'maxlen': 80, 'vocab_size': 20000, 'embed_dim': 256, 'trainable': True, 'dtype': 'float32'}, 'registered_name': 'TokenAndPositionEmbedding', 'build_config': {'input_shape': [None, 80]}, 'name': 'token_and_position_embedding_11', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80], 'dtype': 'int32', 'keras_history': ['input_layer_8', 0, 0]}}], 'kwargs': {}}]}

version

Python 3.10.10
tensorflow==2.16.1
keras==3.3.3
@sachinprasadhs
Copy link
Collaborator

You can follow the instruction as error message suggested to register the custom objects.

Here are the multiple ways of doing it https://keras.io/guides/serialization_and_saving/#custom-objects

@sachinprasadhs sachinprasadhs added type:support User is asking for help / asking an implementation question. Stackoverflow would be better suited. stat:awaiting response from contributor labels Jun 21, 2024
@alkaou
Copy link
Author

alkaou commented Jun 22, 2024

You can follow the instruction as error message suggested to register the custom objects.

Here are the multiple ways of doing it https://keras.io/guides/serialization_and_saving/#custom-objects

I've tried everything possible, but I still can't load my saved model. I made a git repository. you can please visit my code to help me.

code is here :
https://github.com/alkaou/GenIA_LLM.git

@h4ck4l1
Copy link

h4ck4l1 commented Jun 23, 2024

You are writing your keras custom layer but you have keras passing its own arguments like trainable or non-trainable into its layers. so you should pass the **kwargs into all the layers and models because we don't know what arguments are passing during training

either you give the arguments passed onto the custom functions from outside the create_model() like these

vocab_size = 20000  # Only consider the top 20k words
maxlen = 80  # Max sequence size
embed_dim = 256  # Embedding size for each token
num_heads = 4  # Number of attention heads
feed_forward_dim = 256  # Hidden layer size in feed forward network inside transformer

to the custom_objects in the load_model's argument or you can give them in config

Method 1:

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1,**kwargs): # pass the **kwargs which takes the training = True arguments when training and False when predicting
        super().__init__(**kwargs) # pass it to super so it properly flows into the orginal keras.layers.Layer
        self.att = layers.MultiHeadAttention(num_heads, embed_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(ff_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs):
        input_shape = ops.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool")
        attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
        attention_output = self.dropout1(attention_output)
        out1 = self.layernorm1(inputs + attention_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)

# same with TokenAndPositionEmbedding

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim,**kwargs):
        super().__init__(**kwargs)
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = ops.shape(x)[-1]
        positions = ops.arange(0, maxlen, 1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

the while loading you gotta all the arguments into the custom objects in load model

keras.models.load_model(
    "br_model.keras",
    custom_objects={
        "vocab_size":20000,
        "maxlen":80,
        "embed_dim":256,
        "num_heads":4,
        "feed_forward_dim":256,
        "TokenAndPositionEmbedding":TokenAndPositionEmbedding,
        "TransformerBlock":TransformerBlock
        }
)

or you could do this and avoid it passing in custom_objects

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1,**kwargs):
        super().__init__(**kwargs)
        self.att = layers.MultiHeadAttention(num_heads, embed_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(ff_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs):
        input_shape = ops.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool")
        attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
        attention_output = self.dropout1(attention_output)
        out1 = self.layernorm1(inputs + attention_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)


    def get_config(self):
        config = super().get_config().copy()
        config.update(
            {
                "embed_dim":self.att.key_dim,
                "num_heads": self.at.num_heads,
                "ff_dim": self.ffn.layers[0].units,
                "rate": self.dropout1.rate,
            }
        )
        return config

    @classmethod
    def from_config(cls,config):
        return cls(**config)

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim,**kwargs):
        super().__init__(**kwargs)
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = ops.shape(x)[-1]
        positions = ops.arange(0, maxlen, 1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super().get_config().copy()
        config.update(
            {
                "maxlen": self.pos_emb.input_dim,
                "vocab_size": self.token_emb.input_dim,
                "embed_dim": self.token_emb.output_dim
            }
        )
        return config

    @classmethod
    def from_config(cls,config):
        return cls(**config)

and when loading model

keras.models.load_model(
    "br_model.keras",
    custom_objects={
        "TokenAndPositionEmbedding":TokenAndPositionEmbedding,
        "TransformerBlock":TransformerBlock
        }
)

See whether this solves it

@alkaou
Copy link
Author

alkaou commented Jun 24, 2024

Vous écrivez votre couche personnalisée keras mais vous avez des keras qui transmettent ses propres arguments comme entraînable ou non entraînable dans ses couches. vous devez donc transmettre les **kwargs dans toutes les couches et tous les modèles car nous ne savons pas quels arguments sont transmis pendant la formation

soit vous donnez les arguments transmis aux fonctions personnalisées depuis l'extérieur de create_model() comme ceux-ci

vocab_size = 20000  # Only consider the top 20k words
maxlen = 80  # Max sequence size
embed_dim = 256  # Embedding size for each token
num_heads = 4  # Number of attention heads
feed_forward_dim = 256  # Hidden layer size in feed forward network inside transformer

aux custom_objects dans l'argument du load_model ou vous pouvez les donner dans la configuration

Méthode 1 :

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1,**kwargs): # pass the **kwargs which takes the training = True arguments when training and False when predicting
        super().__init__(**kwargs) # pass it to super so it properly flows into the orginal keras.layers.Layer
        self.att = layers.MultiHeadAttention(num_heads, embed_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(ff_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs):
        input_shape = ops.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool")
        attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
        attention_output = self.dropout1(attention_output)
        out1 = self.layernorm1(inputs + attention_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)

# same with TokenAndPositionEmbedding

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim,**kwargs):
        super().__init__(**kwargs)
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = ops.shape(x)[-1]
        positions = ops.arange(0, maxlen, 1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

pendant le chargement, vous devez tous les arguments dans les objets personnalisés dans le modèle de chargement

keras.models.load_model(
    "br_model.keras",
    custom_objects={
        "vocab_size":20000,
        "maxlen":80,
        "embed_dim":256,
        "num_heads":4,
        "feed_forward_dim":256,
        "TokenAndPositionEmbedding":TokenAndPositionEmbedding,
        "TransformerBlock":TransformerBlock
        }
)

ou vous pouvez le faire et éviter de le transmettre à custom_objects

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1,**kwargs):
        super().__init__(**kwargs)
        self.att = layers.MultiHeadAttention(num_heads, embed_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(ff_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs):
        input_shape = ops.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool")
        attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
        attention_output = self.dropout1(attention_output)
        out1 = self.layernorm1(inputs + attention_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)


    def get_config(self):
        config = super().get_config().copy()
        config.update(
            {
                "embed_dim":self.att.key_dim,
                "num_heads": self.at.num_heads,
                "ff_dim": self.ffn.layers[0].units,
                "rate": self.dropout1.rate,
            }
        )
        return config

    @classmethod
    def from_config(cls,config):
        return cls(**config)

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim,**kwargs):
        super().__init__(**kwargs)
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = ops.shape(x)[-1]
        positions = ops.arange(0, maxlen, 1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super().get_config().copy()
        config.update(
            {
                "maxlen": self.pos_emb.input_dim,
                "vocab_size": self.token_emb.input_dim,
                "embed_dim": self.token_emb.output_dim
            }
        )
        return config

    @classmethod
    def from_config(cls,config):
        return cls(**config)

et lors du chargement du modèle

keras.models.load_model(
    "br_model.keras",
    custom_objects={
        "TokenAndPositionEmbedding":TokenAndPositionEmbedding,
        "TransformerBlock":TransformerBlock
        }
)

Voyez si cela résout le problème

Thank you very so much. It's working.

@h4ck4l1
Copy link

h4ck4l1 commented Jun 24, 2024

brother close this as completed. Thank you.

@emi-dm
Copy link

emi-dm commented Jun 27, 2024

Is it possible to use the Keras register_serializable decorator here and avoid using the custom scope?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
type:support User is asking for help / asking an implementation question. Stackoverflow would be better suited.
Projects
None yet
Development

No branches or pull requests

4 participants