OliverPerrin commited on
Commit
6b4c072
·
1 Parent(s): d9092be

Build: Pin ruff to v0.4.4 in pre-commit to match CI

Browse files
.pre-commit-config.yaml CHANGED
@@ -1,6 +1,6 @@
1
  repos:
2
  - repo: https://github.com/astral-sh/ruff-pre-commit
3
- rev: v0.14.7
4
  hooks:
5
  - id: ruff
6
  args: [ --fix ]
 
1
  repos:
2
  - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.4.4
4
  hooks:
5
  - id: ruff
6
  args: [ --fix ]
src/models/encoder.py CHANGED
@@ -160,9 +160,9 @@ class TransformerEncoder(nn.Module):
160
  Build a 3D attention mask (batch, seq, seq) from input_ids and pad_token_id.
161
  True indicates valid positions; False indicates masked (pad).
162
  """
163
- assert self.pad_token_id is not None, (
164
- "pad_token_id must be set to build padding mask from ids."
165
- )
166
  # mask shape: (batch, seq) where True = token kept (non-pad)
167
  pad_mask = input_ids != self.pad_token_id
168
  # Convert to (batch, seq_q, seq_k) by outer product broadcasting
 
160
  Build a 3D attention mask (batch, seq, seq) from input_ids and pad_token_id.
161
  True indicates valid positions; False indicates masked (pad).
162
  """
163
+ assert (
164
+ self.pad_token_id is not None
165
+ ), "pad_token_id must be set to build padding mask from ids."
166
  # mask shape: (batch, seq) where True = token kept (non-pad)
167
  pad_mask = input_ids != self.pad_token_id
168
  # Convert to (batch, seq_q, seq_k) by outer product broadcasting
src/models/heads.py CHANGED
@@ -97,12 +97,12 @@ class LMHead(nn.Module):
97
 
98
  if tie_embedding is not None:
99
  # Validate sizes
100
- assert tie_embedding.num_embeddings == vocab_size, (
101
- "vocab size mismatch for weight tying"
102
- )
103
- assert tie_embedding.embedding_dim == d_model, (
104
- "embedding dim must match d_model for weight tying"
105
- )
106
  # Tie weights: point the projection weight to the embedding weight Tensor
107
  # Remove the existing projection parameter in favor of the embedding weight
108
  # This keeps the same Parameter object, so updates affect both modules.
 
97
 
98
  if tie_embedding is not None:
99
  # Validate sizes
100
+ assert (
101
+ tie_embedding.num_embeddings == vocab_size
102
+ ), "vocab size mismatch for weight tying"
103
+ assert (
104
+ tie_embedding.embedding_dim == d_model
105
+ ), "embedding dim must match d_model for weight tying"
106
  # Tie weights: point the projection weight to the embedding weight Tensor
107
  # Remove the existing projection parameter in favor of the embedding weight
108
  # This keeps the same Parameter object, so updates affect both modules.
tests/test_models/test_decoder.py CHANGED
@@ -64,9 +64,9 @@ def test_decoder_layer_causal_mask_blocks_future():
64
  B, H, Tq, Tk = self_attn.shape
65
  for i in range(Tq):
66
  for j in range(i + 1, Tk):
67
- assert torch.allclose(self_attn[:, :, i, j], torch.zeros(B, H)), (
68
- f"Found nonzero attention to future position {j} from query {i}"
69
- )
70
 
71
 
72
  def test_decoder_stack_and_greedy_decode_shapes():
 
64
  B, H, Tq, Tk = self_attn.shape
65
  for i in range(Tq):
66
  for j in range(i + 1, Tk):
67
+ assert torch.allclose(
68
+ self_attn[:, :, i, j], torch.zeros(B, H)
69
+ ), f"Found nonzero attention to future position {j} from query {i}"
70
 
71
 
72
  def test_decoder_stack_and_greedy_decode_shapes():