Spaces:

OliverPerrin
/

LexiMind

Running

OliverPerrin commited on 21 days ago

Commit

6b4c072

1 Parent(s): d9092be

Build: Pin ruff to v0.4.4 in pre-commit to match CI

Files changed (4) hide show

.pre-commit-config.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.14.7
     hooks:
       - id: ruff
         args: [ --fix ]

 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.4
     hooks:
       - id: ruff
         args: [ --fix ]

src/models/encoder.py CHANGED Viewed

@@ -160,9 +160,9 @@ class TransformerEncoder(nn.Module):
         Build a 3D attention mask (batch, seq, seq) from input_ids and pad_token_id.
         True indicates valid positions; False indicates masked (pad).
         """
-        assert self.pad_token_id is not None, (
-            "pad_token_id must be set to build padding mask from ids."
-        )
         # mask shape: (batch, seq) where True = token kept (non-pad)
         pad_mask = input_ids != self.pad_token_id
         # Convert to (batch, seq_q, seq_k) by outer product broadcasting

         Build a 3D attention mask (batch, seq, seq) from input_ids and pad_token_id.
         True indicates valid positions; False indicates masked (pad).
         """
+        assert (
+            self.pad_token_id is not None
+        ), "pad_token_id must be set to build padding mask from ids."
         # mask shape: (batch, seq) where True = token kept (non-pad)
         pad_mask = input_ids != self.pad_token_id
         # Convert to (batch, seq_q, seq_k) by outer product broadcasting

src/models/heads.py CHANGED Viewed

@@ -97,12 +97,12 @@ class LMHead(nn.Module):
         if tie_embedding is not None:
             # Validate sizes
-            assert tie_embedding.num_embeddings == vocab_size, (
-                "vocab size mismatch for weight tying"
-            )
-            assert tie_embedding.embedding_dim == d_model, (
-                "embedding dim must match d_model for weight tying"
-            )
             # Tie weights: point the projection weight to the embedding weight Tensor
             # Remove the existing projection parameter in favor of the embedding weight
             # This keeps the same Parameter object, so updates affect both modules.

         if tie_embedding is not None:
             # Validate sizes
+            assert (
+                tie_embedding.num_embeddings == vocab_size
+            ), "vocab size mismatch for weight tying"
+            assert (
+                tie_embedding.embedding_dim == d_model
+            ), "embedding dim must match d_model for weight tying"
             # Tie weights: point the projection weight to the embedding weight Tensor
             # Remove the existing projection parameter in favor of the embedding weight
             # This keeps the same Parameter object, so updates affect both modules.

tests/test_models/test_decoder.py CHANGED Viewed

@@ -64,9 +64,9 @@ def test_decoder_layer_causal_mask_blocks_future():
     B, H, Tq, Tk = self_attn.shape
     for i in range(Tq):
         for j in range(i + 1, Tk):
-            assert torch.allclose(self_attn[:, :, i, j], torch.zeros(B, H)), (
-                f"Found nonzero attention to future position {j} from query {i}"
-            )
 def test_decoder_stack_and_greedy_decode_shapes():

     B, H, Tq, Tk = self_attn.shape
     for i in range(Tq):
         for j in range(i + 1, Tk):
+            assert torch.allclose(
+                self_attn[:, :, i, j], torch.zeros(B, H)
+            ), f"Found nonzero attention to future position {j} from query {i}"
 def test_decoder_stack_and_greedy_decode_shapes():