Spaces:

huggingface
/

eiffel-tower-llama-demo

Running on Zero

App Files Files Community

dlouapre HF Staff commited on Nov 5

Commit

3a0c265

1 Parent(s): 2a3cabe

Removing nnsight imports

Browse files

Files changed (1) hide show

steering.py +0 -83

steering.py CHANGED Viewed

@@ -1,47 +1,6 @@
 import torch
-from nnsight import LanguageModel
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
-from huggingface_hub import hf_hub_download
-def load_saes(cfg, device):
-    """Load steering vectors from SAEs and prepare steering components."""
-    if not cfg['features'] or len(cfg['features']) == 0:
-        print("No features specified, returning empty steering components.")
-        return []
-    steering_components = []
-    cache_dir = "./downloads"
-    features = cfg['features']
-    reduced_strengths = cfg['reduced_strengths']
-    for i, feature in enumerate(features):
-        layer_idx, feature_idx = feature[0], feature[1]
-        strength = feature[2] if len(feature) > 2 else 0.0
-        # If the strengths in the config file were given in reduced form, scale them by layer index
-        if reduced_strengths:
-            strength *= layer_idx
-        # Display strength (avoid division by zero)
-        reduced_str = f"[{strength/layer_idx:.2f}]" if layer_idx > 0 else "[N/A]"
-        print(f"Loading feature {layer_idx} {feature_idx} {strength:.2f} {reduced_str}")
-        sae_filename = cfg['sae_filename_prefix'] + f"{layer_idx}" + cfg['sae_filename_suffix']
-        file_path = hf_hub_download(repo_id=cfg['sae_path'], filename=sae_filename, cache_dir=cache_dir)
-        sae = torch.load(file_path, map_location="cpu")
-        vec = sae["decoder.weight"][:, feature_idx].to(device, non_blocking=True)
-        steering_components.append({
-            'layer': layer_idx,
-            'feature': feature_idx,
-            'strength': strength,
-            'vector': vec
-        })
-        del sae
-    return steering_components
 def load_saes_from_file(file_path, cfg, device):
@@ -112,48 +71,6 @@ def load_saes_from_file(file_path, cfg, device):
     return steering_components
-def generate_steered_answer(model: LanguageModel,
-                            chat,
-                            steering_components,
-                            max_new_tokens=128,
-                            temperature=0.0,
-                            repetition_penalty=1.0,
-                            clamp_intensity=False):
-    """
-    Generates an answer from the model given a chat history, applying steering components.
-    Expects steering_components to be a list of dicts with keys:
-        'layer': int, layer index to apply steering
-        'strength': float, steering intensity
-        'vector': torch.Tensor, steering vector
-    """
-    input_ids = model.tokenizer.apply_chat_template(chat, tokenize=True, add_generation_prompt=True)
-    with model.generate(max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty,
-                        do_sample=temperature > 0.0, temperature=temperature,
-                        pad_token_id=model.tokenizer.eos_token_id) as tracer:
-        with tracer.invoke(input_ids):
-            with tracer.all():
-                for sc in steering_components:
-                    layer, strength, vector = sc["layer"], sc["strength"], sc["vector"]
-                    # Ensure vector matches model dtype and device
-                    layer_output = model.model.layers[layer].output
-                    vector = vector.to(dtype=layer_output.dtype, device=layer_output.device)
-                    length = layer_output.shape[1]
-                    amount = (strength * vector).unsqueeze(0).expand(length, -1).unsqueeze(0).clone()
-                    if clamp_intensity:
-                        projection = (layer_output @ vector).unsqueeze(-1)@(vector.unsqueeze(0))
-                        amount -= projection
-                    layer_output += amount
-        with tracer.invoke():
-            trace = model.generator.output.save()
-    answer = model.tokenizer.decode(trace[0][len(input_ids):], skip_special_tokens=True)
-    output = {'input_ids': input_ids, 'trace': trace, 'answer': answer}
-    return output
 def create_steering_hook(layer_idx, steering_components, clamp_intensity=False):
     """

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
 def load_saes_from_file(file_path, cfg, device):
     return steering_components
 def create_steering_hook(layer_idx, steering_components, clamp_intensity=False):
     """