YAML Metadata Warning: empty or missing yaml metadata in repo card (https://huggingface.co/docs/hub/model-cards#model-card-metadata)

Model Structure

class MambaComp(nn.Module):
    def __init__(
        self,
        enc_in: int,
        c_out: int,
        e_layers: int,
        noise_level: float,
        d_model: int,
        d_ff: int,
        d_state: int,
        d_conv: int,
        expand: int,
        dropout: float = 0.0,
    ) -> None:
        super().__init__()

        self.input_drop = nn.Dropout(dropout)

        self.input_size = enc_in
        self.output_size = c_out
        self.num_layers = e_layers
        self.noise_level = noise_level

        self.mamba = nn.ModuleList(
            [
                Mamba(
                    d_model=d_model,  # Model dimension d_model
                    d_state=d_state,  # SSM state expansion factor 16
                    d_conv=d_conv,  # Local convolution width 4
                    expand=expand,  # Block expansion factor 2
                )
                for _ in range(self.num_layers)
            ]
        )

        self.in_layer = nn.Linear(self.input_size, d_model)
        self.layer_norm = nn.LayerNorm(d_model)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        b, t, s, f = x.shape
        x = self.input_drop(x)
        x = x.permute(0, 2, 1, 3).reshape(b * s, t, f)

        if self.training and self.noise_level > 0:
            noise = torch.randn_like(x).to(x)
            x = x + noise * self.noise_level

        x = self.in_layer(x)  # [b*s, t, d_model]
        x = self.layer_norm(x)

        for i in range(self.num_layers):
            x = self.mamba[i](x)  # [b*s, t, d_model]

        out = x[:, -1, :].reshape(b, s, -1)  # [b, s, d_model]

        return out  # [b, s, d_model]


class Mambav1(nn.Module):
    def __init__(
        self,
        enc_in: int,
        c_out: int,
        e_layers: int,
        noise_level: float,
        d_model: int,
        d_ff: int,
        d_state: int,
        d_conv: int,
        expand: int,
        dropout: float = 0.0,
    ) -> None:
        super().__init__()

        self.input_drop = nn.Dropout(dropout)

        self.input_size = enc_in
        self.output_size = c_out
        self.num_layers = e_layers
        self.noise_level = noise_level

        self.mamba = MambaComp(
            enc_in=self.input_size,
            c_out=self.output_size,
            e_layers=self.num_layers,
            noise_level=self.noise_level,
            d_model=d_model,
            d_ff=d_ff,
            d_state=d_state,
            d_conv=d_conv,
            expand=expand,
            dropout=dropout,
        )
        self.projection = nn.Linear(d_model, c_out, bias=True)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        x.shape [b, t, s, f]
        """
        b, _, s, _ = x.shape
        mamba_out = self.mamba(x)  # [b, s, d_model]
        out = self.projection(mamba_out)  # [b*s, 1]
        out = out.reshape(b, s, 1).squeeze(-1)  # [b, s]

        return out

Model Config

e_layers: 1
enc_in: 8
c_out: 1
d_model: 64
d_ff: 64
d_state: 16
d_conv: 4
expand: 2
dropout: 0.1
noise_level: 0.0
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Collection including Abner0803/0116-Mamba-with-norm