Controlled_Chat_CPU

Sleeping

App Files Files Community

abrakjamson commited on Sep 26, 2024

Commit

9acb8e6

1 Parent(s): 129904f

advanced settings, bug fixes

Browse files

Files changed (1) hide show

app.py +39 -12

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ model = model.to("cuda:0" if torch.cuda.is_available() else "cpu")
 model = ControlModel(model, list(range(-5, -18, -1)))
 # Generation settings
-generation_settings = {
     "pad_token_id": tokenizer.eos_token_id,  # Silence warning
     "do_sample": False,                      # Deterministic output
     "max_new_tokens": 256,
@@ -48,14 +48,19 @@ def toggle_slider(checked):
     return gr.update(visible=checked)
 # Function to generate the model's response
-def generate_response(system_prompt, user_message, *args, history):
     # Separate checkboxes and sliders based on type
-    print(f"Generating response to {user_message}")
-    checkboxes = [item for item in args if isinstance(item, bool)]
-    sliders = [item for item in args if isinstance(item, (int, float))]
     if len(checkboxes) != len(control_vector_files) or len(sliders) != len(control_vector_files):
-        return history  # Return current history if there's a mismatch
     # Reset any previous control vectors
     model.reset()
@@ -66,7 +71,6 @@ def generate_response(system_prompt, user_message, *args, history):
             cv_file = control_vector_files[i]
             weight = sliders[i]
             try:
-                print(f"Setting {cv_file} to {weight}")
                 control_vector = ControlVector.import_gguf(cv_file)
                 model.set_control(control_vector, weight)
             except Exception as e:
@@ -91,8 +95,15 @@ def generate_response(system_prompt, user_message, *args, history):
     # Tokenize the input
     input_ids = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
     # Generate the response
-    output_ids = model.generate(**input_ids, **generation_settings)
     response = tokenizer.decode(output_ids.squeeze(), skip_special_tokens=True)
     # Clean up the response by removing any trailing tags
@@ -101,7 +112,7 @@ def generate_response(system_prompt, user_message, *args, history):
     # Update conversation history
     history.append((user_message, response))
-    return history
 # Function to reset the conversation history
 def reset_chat():
@@ -120,7 +131,7 @@ with gr.Blocks() as demo:
             system_prompt = gr.Textbox(
                 label="System Prompt",
                 lines=2,
-                placeholder="Enter system-level instructions here..."
             )
             gr.Markdown("### 📊 Control Vectors")
@@ -152,6 +163,22 @@ with gr.Blocks() as demo:
                         outputs=slider
                     )
         # Right Column: Chat Interface
         with gr.Column(scale=2):
             gr.Markdown("### 🗨️ Conversation")
@@ -172,7 +199,7 @@ with gr.Blocks() as demo:
                 new_chat_button = gr.Button("🆕 New Chat")
     # State to keep track of conversation history
-    state = gr.State([])
     # Define button actions
     submit_button.click(
@@ -180,7 +207,7 @@ with gr.Blocks() as demo:
         inputs=[system_prompt, user_input] + control_checks + control_sliders + [state],
         outputs=[chatbot, state]
     )
     new_chat_button.click(
         reset_chat,
         inputs=[],

 model = ControlModel(model, list(range(-5, -18, -1)))
 # Generation settings
+default_generation_settings = {
     "pad_token_id": tokenizer.eos_token_id,  # Silence warning
     "do_sample": False,                      # Deterministic output
     "max_new_tokens": 256,
     return gr.update(visible=checked)
 # Function to generate the model's response
+def generate_response(system_prompt, user_message, *args, history=None, max_new_tokens=256, repetition_penalty=1.1):
+    checkboxes = []
+    sliders = []
     # Separate checkboxes and sliders based on type
+    for item in args:
+        if type(item) == bool:
+            checkboxes.append(item)
+        elif isinstance(item, (int, float)):
+            sliders.append(item)
     if len(checkboxes) != len(control_vector_files) or len(sliders) != len(control_vector_files):
+        return history if history else [], history if history else []
     # Reset any previous control vectors
     model.reset()
             cv_file = control_vector_files[i]
             weight = sliders[i]
             try:
                 control_vector = ControlVector.import_gguf(cv_file)
                 model.set_control(control_vector, weight)
             except Exception as e:
     # Tokenize the input
     input_ids = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
+    generation_settings = {
+        "pad_token_id": tokenizer.eos_token_id,
+        "do_sample": default_generation_settings["do_sample"],
+        "max_new_tokens": int(max_new_tokens),
+        "repetition_penalty": repetition_penalty,
+    }
     # Generate the response
+    output_ids = model.generate(**input_ids, **default_generation_settings)
     response = tokenizer.decode(output_ids.squeeze(), skip_special_tokens=True)
     # Clean up the response by removing any trailing tags
     # Update conversation history
     history.append((user_message, response))
+    return history, history
 # Function to reset the conversation history
 def reset_chat():
             system_prompt = gr.Textbox(
                 label="System Prompt",
                 lines=2,
+                placeholder="Respond tot he user concisely"
             )
             gr.Markdown("### 📊 Control Vectors")
                         outputs=slider
                     )
+                    # Advanced Settings Section (collapsed by default)
+            with gr.Accordion("🔧 Advanced Settings", open=False):
+                with gr.Row():
+                    max_new_tokens = gr.Number(
+                        label="Max New Tokens",
+                        value=default_generation_settings["max_new_tokens"],
+                        precision=0,
+                        step=10,
+                    )
+                    repetition_penalty = gr.Number(
+                        label="Repetition Penalty",
+                        value=default_generation_settings["repetition_penalty"],
+                        precision=2,
+                        step=0.1,
+                    )
         # Right Column: Chat Interface
         with gr.Column(scale=2):
             gr.Markdown("### 🗨️ Conversation")
                 new_chat_button = gr.Button("🆕 New Chat")
     # State to keep track of conversation history
+    state = gr.State()
     # Define button actions
     submit_button.click(
         inputs=[system_prompt, user_input] + control_checks + control_sliders + [state],
         outputs=[chatbot, state]
     )
     new_chat_button.click(
         reset_chat,
         inputs=[],