Shim commited on
Commit
79cc1d2
ยท
1 Parent(s): 37545aa

Improve model generation and remove static responses - use better Hebrew model and full persona prompts

Browse files
Files changed (1) hide show
  1. app.py +144 -101
app.py CHANGED
@@ -42,24 +42,35 @@ class MirautrApp:
42
  is_hf_spaces = os.getenv("SPACE_ID") is not None
43
 
44
  if is_hf_spaces:
45
- logger.info("Running in Hugging Face Spaces - using lightweight Hebrew-capable model")
46
- # Use a small multilingual model that supports Hebrew and fits in HF Spaces
47
- model_name = "google/flan-t5-small" # 77M parameters, supports Hebrew
48
- logger.info(f"Loading lightweight model: {model_name}")
49
-
 
 
 
 
 
 
 
50
  else:
51
  # For local development, try Hebrew-specific model first
52
  try:
53
  model_name = "yam-peleg/Hebrew-Mistral-7B"
54
  logger.info(f"Loading Hebrew model: {model_name}")
55
  except:
56
- # Fallback to small model for local testing too
57
- model_name = "google/flan-t5-small"
58
- logger.info(f"Falling back to small model: {model_name}")
59
 
60
  # Load tokenizer
61
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
62
 
 
 
 
 
63
  # Determine the best settings for the environment
64
  if torch.cuda.is_available() and not is_hf_spaces:
65
  torch_dtype = torch.float16
@@ -70,14 +81,7 @@ class MirautrApp:
70
  device_map = None
71
 
72
  # Load model with appropriate settings
73
- if "t5" in model_name.lower():
74
- # Use Seq2Seq model for T5
75
- self.model = AutoModelForSeq2SeqLM.from_pretrained(
76
- model_name,
77
- torch_dtype=torch_dtype,
78
- low_cpu_mem_usage=True
79
- )
80
- elif "mistral" in model_name.lower():
81
  # Use CausalLM for Mistral with additional settings
82
  self.model = AutoModelForCausalLM.from_pretrained(
83
  model_name,
@@ -91,35 +95,31 @@ class MirautrApp:
91
  self.model = AutoModelForCausalLM.from_pretrained(
92
  model_name,
93
  torch_dtype=torch_dtype,
94
- low_cpu_mem_usage=True
 
95
  )
96
 
97
  # Create text generation pipeline with appropriate settings
98
  generation_kwargs = {
99
- "max_new_tokens": 100,
100
- "temperature": 0.8,
101
  "do_sample": True,
102
- "pad_token_id": self.tokenizer.eos_token_id,
 
 
 
103
  "return_full_text": False
104
  }
105
 
106
- # For T5 models, use text2text-generation
107
- if "t5" in model_name.lower():
108
- self.generator = pipeline(
109
- "text2text-generation",
110
- model=self.model,
111
- tokenizer=self.tokenizer,
112
- **generation_kwargs
113
- )
114
- else:
115
- self.generator = pipeline(
116
- "text-generation",
117
- model=self.model,
118
- tokenizer=self.tokenizer,
119
- **generation_kwargs
120
- )
121
 
122
- logger.info("Model loaded successfully")
123
 
124
  except Exception as e:
125
  logger.error(f"Error loading model: {e}")
@@ -159,90 +159,115 @@ class MirautrApp:
159
  # Prepare conversation context
160
  context = self.conversation_manager.get_conversation_context(conversation_state)
161
 
162
- # Try to generate with model first
163
  response = None
164
  if self.generator:
165
  try:
166
- # Check if using T5 model (text2text-generation)
167
- if hasattr(self.generator, 'task') and self.generator.task == 'text2text-generation':
168
- # For T5 models, create a more structured prompt
169
- part_description = DEFAULT_PARTS.get(conversation_state.selected_part, {}).get("description", conversation_state.selected_part)
170
- persona_name = conversation_state.persona_name or DEFAULT_PARTS.get(conversation_state.selected_part, {}).get("default_persona_name", "ื—ืœืง ืคื ื™ืžื™")
171
-
172
- prompt = f"ืืชื” {persona_name}, {part_description}. ืขื ื” ื‘ืขื‘ืจื™ืช ืขืœ ื”ื”ื•ื“ืขื” ื”ื‘ืื” ื‘ื”ืชืื ืœืื•ืคื™ ืฉืœืš: {user_message}"
173
-
174
- outputs = self.generator(prompt, max_length=150, num_return_sequences=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  response = outputs[0]["generated_text"].strip()
 
176
 
177
- # Clean up the response if it repeats the prompt
178
- if prompt in response:
179
- response = response.replace(prompt, "").strip()
 
 
 
 
 
 
 
 
 
 
 
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  else:
182
- # For causal LM models
183
- full_prompt = f"{system_prompt}\n\nื”ืงืฉืจ: {context}\n\nื”ืžืฉืชืžืฉ ืืžืจ: {user_message}\n\nืชื’ื•ื‘ื”:"
184
- outputs = self.generator(full_prompt)
185
- response = outputs[0]["generated_text"]
186
- # Extract only the new generated part
187
- response = response[len(full_prompt):].strip()
188
-
189
- # Basic validation and cleanup
190
- if not response or len(response.strip()) < 5:
191
  response = None
192
 
193
  except Exception as gen_error:
194
- logger.warning(f"Model generation failed: {gen_error}, falling back to contextual response")
195
  response = None
196
 
197
- # If model generation failed or no model available, use fallback
198
  if not response:
199
- # Fallback response for demo mode
200
  part_info = DEFAULT_PARTS.get(conversation_state.selected_part, {})
201
  persona_name = conversation_state.persona_name or part_info.get("default_persona_name", "ื—ืœืง ืคื ื™ืžื™")
 
202
 
203
- # Generate contextual response based on the part and user message
204
- responses_by_part = {
205
- "ื”ืงื•ืœ ื”ื‘ื™ืงื•ืจืชื™": [
206
- f"ืื ื™ {persona_name}, ื”ืงื•ืœ ื”ื‘ื™ืงื•ืจืชื™ ืฉืœืš. ืฉืžืขืชื™ ืžื” ืฉืืžืจืช ืขืœ '{user_message}'. ืื ื™ ื—ื•ืฉื‘ ืฉื›ื“ืื™ ืœื‘ื—ื•ืŸ ืืช ื–ื” ื™ื•ืชืจ ืœืขื•ืžืง - ืžื” ื‘ืืžืช ืžื ื™ืข ืื•ืชืš ื›ืืŸ?",
207
- f"ื›{persona_name}, ืื ื™ ืžืขื™ืจ ืฉื™ื ืœื‘ - '{user_message}' - ื”ืื ื–ื” ื‘ืืžืช ืžื” ืฉืืชื” ืฆืจื™ืš ืขื›ืฉื™ื•? ื‘ื•ืื ื• ื ื—ืฉื•ื‘ ืขืœ ื–ื” ื™ื—ื“.",
208
- f"ืื ื™ {persona_name} ื•ืื ื™ ื›ืืŸ ื›ื“ื™ ืœืขื–ื•ืจ ืœืš ืœื—ืฉื•ื‘ ื‘ื™ืงื•ืจืชื™ื•ืช. ืžื” ืฉืืžืจืช ืขืœ '{user_message}' ืžืขื•ืจืจ ื‘ื™ ืฉืืœื•ืช - ื”ืื ื‘ื—ื ืช ืืช ื›ืœ ื”ื–ื•ื•ื™ื•ืช?"
209
- ],
210
- "ื”ื™ืœื“/ื” ื”ืคื ื™ืžื™ืช": [
211
- f"ืื ื™ {persona_name}, ื”ื™ืœื“/ื” ื”ืคื ื™ืžื™ืช ืฉืœืš. ืžื” ืฉืืžืจืช - '{user_message}' - ื’ื•ืจื ืœื™ ืœื”ืจื’ื™ืฉ... ื–ื” ืงืฆืช ืžืคื—ื™ื“ ืื‘ืœ ื’ื ืžืขื ื™ื™ืŸ. ืืชื” ื‘ืืžืช ืฉื•ืžืข ืื•ืชื™?",
212
- f"ื”ื™ื™, ืื ื™ {persona_name}! ืžื” ืฉืืžืจืช ืขื›ืฉื™ื• ืขืœ '{user_message}' ื ื•ื’ืข ืœื™ ื‘ืœื‘. ืœืคืขืžื™ื ืื ื™ ืžืจื’ื™ืฉ/ื” ื›ืœ ื›ืš ืงื˜ืŸ/ื” ื‘ืคื ื™ื, ืื‘ืœ ืืชื” ื ื•ืชืŸ ืœื™ ืžืงื•ื ื›ืืŸ.",
213
- f"ืื ื™ {persona_name}, ื”ื—ืœืง ื”ืฆืขื™ืจ ื•ื”ืจื’ื™ืฉ ืฉืœืš. '{user_message}' - ื–ื” ืžื–ื›ื™ืจ ืœื™ ืื™ืš ื–ื” ื”ืจื’ื™ืฉ ืคืขื ืœื”ื™ื•ืช ืงื˜ืŸ/ื”. ืืชื” ื–ื•ื›ืจ ืื™ืš ื–ื” ื”ื™ื”?"
214
- ],
215
- "ื”ืžืจืฆื”": [
216
- f"ืื ื™ {persona_name}, ื”ืžืจืฆื” ืฉืœืš. ืžื” ืฉืืžืจืช - '{user_message}' - ืื ื™ ืจื•ืฆื” ืฉื›ื•ืœื ื™ื‘ื™ื ื• ื•ื™ืจื’ื™ืฉื• ื‘ืกื“ืจ ืขื ื–ื”. ืื™ืš ืื ื™ ื™ื›ื•ืœ ืœืขื–ื•ืจ ืœืš ืœื”ื™ื•ืช ืžืจื•ืฆื” ืžื”ืžืฆื‘?",
217
- f"ื›{persona_name}, ืื ื™ ืชืžื™ื“ ืžื ืกื” ืœื•ื•ื“ื ืฉื›ื•ืœื ืžืจื•ืฆื™ื. ืžื” ืฉืืžืจืช ืขืœ '{user_message}' - ืื™ืš ื–ื” ื™ืฉืคื™ืข ืขืœ ื”ืื—ืจื™ื? ืื ื™ ื“ื•ืื’ ืฉื›ื•ืœื ื™ื”ื™ื• ื‘ืกื“ืจ.",
218
- f"ืื ื™ {persona_name} ื•ืื ื™ ื›ืืŸ ื›ื“ื™ ืœืขื–ื•ืจ ืœืš ืœืžืฆื•ื ื“ืจืš ืฉื›ื•ืœื ื™ื”ื™ื• ืžืจื•ืฆื™ื. '{user_message}' - ื‘ื•ืื ื• ื ืžืฆื ืคืชืจื•ืŸ ืฉืžืชืื™ื ืœื›ื•ืœื."
219
- ],
220
- "ื”ืžื’ืŸ": [
221
- f"ืื ื™ {persona_name}, ื”ืžื’ืŸ ืฉืœืš. ืžื” ืฉืืžืจืช - '{user_message}' - ืื ื™ ื›ืืŸ ื›ื“ื™ ืœืฉืžื•ืจ ืขืœื™ืš. ื”ืื ื–ื” ื‘ื˜ื•ื—? ื”ืื ืื ื™ ืฆืจื™ืš ืœื“ืื•ื’ ืœืžืฉื”ื•?",
222
- f"ื›{persona_name}, ืชืคืงื™ื“ื™ ืœื”ื’ืŸ ืขืœื™ืš. ืžื” ืฉืืžืจืช ืขืœ '{user_message}' ืžืขื•ืจืจ ื‘ื™ ืืช ื”ืื™ื ืกื˜ื™ื ืงื˜ ื”ืžื’ื•ื ืŸ. ืื™ืš ืื ื™ ื™ื›ื•ืœ ืœื•ื•ื“ื ืฉืืชื” ืžื•ื’ืŸ?",
223
- f"ืื ื™ {persona_name}, ื”ืฉื•ืžืจ ื”ื ืืžืŸ ืฉืœืš. '{user_message}' - ืื ื™ ื‘ื•ื—ืŸ ืื ื–ื” ื‘ื˜ื•ื— ืขื‘ื•ืจืš. ืœืคืขืžื™ื ืื ื™ ืฆืจื™ืš ืœื”ื™ื•ืช ืงืฉื•ื— ื›ื“ื™ ืœืฉืžื•ืจ ืขืœื™ืš."
224
- ],
225
- "ื”ื ืžื ืข/ืช": [
226
- f"ืื ื™ {persona_name}, ื”ื—ืœืง ืฉืžืขื“ื™ืฃ ืœื”ื™ืžื ืข. ืžื” ืฉืืžืจืช ืขืœ '{user_message}' - ื–ื” ืžืขื•ืจืจ ื‘ื™ ื—ืจื“ื”. ืื•ืœื™ ื›ื“ืื™ ืคืฉื•ื˜... ืœื ืœื”ืชืขืกืง ืขื ื–ื” ืขื›ืฉื™ื•?",
227
- f"ื›{persona_name}, ืื ื™ ืžืจื’ื™ืฉ/ื” ืงืฆืช ืœื ื‘ื ื•ื— ืขื '{user_message}'. ืื•ืœื™ ื ื“ื—ื” ืืช ื–ื” ืงืฆืช? ืœืคืขืžื™ื ื–ื” ื‘ืกื“ืจ ืœื ืœื”ืชืžื•ื“ื“ ืขื ื”ื›ืœ ืžื™ื“.",
228
- f"ืื ื™ {persona_name} ื•ืื ื™ ืžืขื“ื™ืฃ/ื” ืœื”ื™ืฉืืจ ื‘ืฆื“. ืžื” ืฉืืžืจืช - '{user_message}' - ื–ื” ื ืจืื” ืžืกื•ื‘ืš. ืืชื” ื‘ื˜ื•ื— ืฉืืชื” ืจื•ืฆื” ืœื”ื™ื›ื ืก ืœื–ื” ืขื›ืฉื™ื•?"
229
- ]
230
- }
231
-
232
- # Get relevant responses for the part
233
- part_responses = responses_by_part.get(conversation_state.selected_part, [
234
- f"ืื ื™ {persona_name}, {conversation_state.selected_part} ืฉืœืš. ืฉืžืขืชื™ ืžื” ืฉืืžืจืช ืขืœ '{user_message}'. ื–ื”ื• ืžืฆื‘ ื”ื“ื’ืžื” - ื‘ืžืฆื‘ ืžืœื ื™ื”ื™ื” ื›ืืŸ ืžื•ื“ืœ ืฉืคื” ืขื‘ืจื™ืช."
235
- ])
236
-
237
- # Select response based on conversation length for variety
238
- response_index = len(conversation_state.conversation_history) % len(part_responses)
239
- response = part_responses[response_index]
240
 
241
  return response
242
 
243
  except Exception as e:
244
  logger.error(f"Error generating response: {e}")
245
- return "ืกืœื™ื—ื”, ื ืชืงืœืชื™ ื‘ื‘ืขื™ื” ื˜ื›ื ื™ืช. ืื ื ื ืกื” ืฉื•ื‘."
246
 
247
  def create_main_interface(self):
248
  """Create the main Gradio interface"""
@@ -555,10 +580,28 @@ def main():
555
  else:
556
  # Local development settings
557
  logger.info("Configuring for local development")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  launch_config.update({
559
  "server_name": "127.0.0.1",
560
- "server_port": int(os.getenv("GRADIO_SERVER_PORT", "7860")),
561
- "share": True, # Create public link for local testing
562
  "quiet": False
563
  })
564
 
 
42
  is_hf_spaces = os.getenv("SPACE_ID") is not None
43
 
44
  if is_hf_spaces:
45
+ logger.info("Running in Hugging Face Spaces - using multilingual model with Hebrew support")
46
+ # Use a better multilingual model that supports Hebrew well
47
+ model_name = "microsoft/DialoGPT-medium" # Better conversational model
48
+ try:
49
+ # Try Hebrew-capable multilingual model first
50
+ model_name = "bigscience/bloomz-560m" # Better Hebrew support
51
+ logger.info(f"Loading multilingual model with Hebrew support: {model_name}")
52
+ except:
53
+ # Fallback to DialoGPT if bloomz fails
54
+ model_name = "microsoft/DialoGPT-medium"
55
+ logger.info(f"Fallback to conversational model: {model_name}")
56
+
57
  else:
58
  # For local development, try Hebrew-specific model first
59
  try:
60
  model_name = "yam-peleg/Hebrew-Mistral-7B"
61
  logger.info(f"Loading Hebrew model: {model_name}")
62
  except:
63
+ # Fallback to better multilingual model
64
+ model_name = "bigscience/bloomz-560m"
65
+ logger.info(f"Falling back to multilingual model: {model_name}")
66
 
67
  # Load tokenizer
68
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
69
 
70
+ # Add padding token if missing
71
+ if self.tokenizer.pad_token is None:
72
+ self.tokenizer.pad_token = self.tokenizer.eos_token
73
+
74
  # Determine the best settings for the environment
75
  if torch.cuda.is_available() and not is_hf_spaces:
76
  torch_dtype = torch.float16
 
81
  device_map = None
82
 
83
  # Load model with appropriate settings
84
+ if "mistral" in model_name.lower():
 
 
 
 
 
 
 
85
  # Use CausalLM for Mistral with additional settings
86
  self.model = AutoModelForCausalLM.from_pretrained(
87
  model_name,
 
95
  self.model = AutoModelForCausalLM.from_pretrained(
96
  model_name,
97
  torch_dtype=torch_dtype,
98
+ low_cpu_mem_usage=True,
99
+ trust_remote_code=True
100
  )
101
 
102
  # Create text generation pipeline with appropriate settings
103
  generation_kwargs = {
104
+ "max_new_tokens": 120,
105
+ "temperature": 0.7,
106
  "do_sample": True,
107
+ "top_p": 0.9,
108
+ "top_k": 50,
109
+ "pad_token_id": self.tokenizer.pad_token_id,
110
+ "eos_token_id": self.tokenizer.eos_token_id,
111
  "return_full_text": False
112
  }
113
 
114
+ # Always use causal LM pipeline for consistent behavior
115
+ self.generator = pipeline(
116
+ "text-generation",
117
+ model=self.model,
118
+ tokenizer=self.tokenizer,
119
+ **generation_kwargs
120
+ )
 
 
 
 
 
 
 
 
121
 
122
+ logger.info(f"Model loaded successfully: {model_name}")
123
 
124
  except Exception as e:
125
  logger.error(f"Error loading model: {e}")
 
159
  # Prepare conversation context
160
  context = self.conversation_manager.get_conversation_context(conversation_state)
161
 
162
+ # Generate response with model
163
  response = None
164
  if self.generator:
165
  try:
166
+ # Get part information for better context
167
+ part_info = DEFAULT_PARTS.get(conversation_state.selected_part, {})
168
+ part_description = part_info.get("description", conversation_state.selected_part)
169
+ persona_name = conversation_state.persona_name or part_info.get("default_persona_name", "ื—ืœืง ืคื ื™ืžื™")
170
+
171
+ # Create a well-structured prompt using the full system prompt
172
+ full_system_prompt = system_prompt.strip()
173
+
174
+ prompt_template = f"""{full_system_prompt}
175
+
176
+ ื”ืงืฉืจ ื ื•ืกืฃ: {conversation_state.user_context if conversation_state.user_context else 'ืœืœื ื”ืงืฉืจ ืžื™ื•ื—ื“'}
177
+
178
+ ืฉื™ื—ื” ืขื“ ื›ื”:
179
+ {context}
180
+
181
+ ื”ืžืฉืชืžืฉ ืืžืจ: "{user_message}"
182
+
183
+ {persona_name} ืžื’ื™ื‘:"""
184
+
185
+ logger.info(f"Generating response for part: {conversation_state.selected_part}")
186
+
187
+ # Generate with the model
188
+ outputs = self.generator(
189
+ prompt_template,
190
+ max_new_tokens=80,
191
+ temperature=0.7,
192
+ do_sample=True,
193
+ top_p=0.9,
194
+ pad_token_id=self.tokenizer.pad_token_id,
195
+ eos_token_id=self.tokenizer.eos_token_id
196
+ )
197
+
198
+ if outputs and len(outputs) > 0:
199
  response = outputs[0]["generated_text"].strip()
200
+ logger.info(f"Raw model output length: {len(response)}")
201
 
202
+ # Clean up response - remove prompt and extract only the new part
203
+ if response:
204
+ # Try to extract only the response part
205
+ response_lines = response.split('\n')
206
+ for i, line in enumerate(response_lines):
207
+ if f"{persona_name} ืžื’ื™ื‘:" in line and i + 1 < len(response_lines):
208
+ response = '\n'.join(response_lines[i+1:]).strip()
209
+ break
210
+
211
+ # If that didn't work, try other cleanup methods
212
+ if not response or len(response) < 10:
213
+ # Look for the response after the last colon
214
+ if ':' in outputs[0]["generated_text"]:
215
+ response = outputs[0]["generated_text"].split(':')[-1].strip()
216
 
217
+ # Validate and clean the response
218
+ if response:
219
+ # Remove any remaining prompt artifacts
220
+ response = response.replace(prompt_template, "").strip()
221
+ response = response.replace(f"{persona_name} ืžื’ื™ื‘:", "").strip()
222
+ response = response.replace("ื”ืžืฉืชืžืฉ ืืžืจ:", "").strip()
223
+
224
+ # Remove incomplete sentences or artifacts
225
+ if response.startswith('"') and not response.endswith('"'):
226
+ response = response[1:]
227
+
228
+ # Ensure minimum quality
229
+ if len(response.strip()) >= 10 and not response.lower().startswith('the user'):
230
+ logger.info(f"Generated response: {response[:50]}...")
231
+ else:
232
+ logger.warning(f"Response too short or invalid: '{response}'")
233
+ response = None
234
+ else:
235
+ logger.warning("Empty response after cleanup")
236
+ response = None
237
  else:
238
+ logger.warning("No outputs from model")
 
 
 
 
 
 
 
 
239
  response = None
240
 
241
  except Exception as gen_error:
242
+ logger.error(f"Model generation failed: {gen_error}")
243
  response = None
244
 
245
+ # If we still don't have a response, generate a contextual one using the persona
246
  if not response:
247
+ logger.info("Using contextual persona-based response generation")
248
  part_info = DEFAULT_PARTS.get(conversation_state.selected_part, {})
249
  persona_name = conversation_state.persona_name or part_info.get("default_persona_name", "ื—ืœืง ืคื ื™ืžื™")
250
+ part_description = part_info.get("description", "")
251
 
252
+ # Generate a more dynamic response based on the actual persona and context
253
+ if conversation_state.selected_part == "ื”ืงื•ืœ ื”ื‘ื™ืงื•ืจืชื™":
254
+ response = f"ืื ื™ {persona_name}. ืฉืžืขืชื™ ืžื” ืฉืืžืจืช - '{user_message}'. ืื ื™ ืžืจื’ื™ืฉ ืฉืฆืจื™ืš ืœื‘ื—ื•ืŸ ืืช ื–ื” ื™ื•ืชืจ ืœืขื•ืžืง. ืžื” ื‘ืืžืช ืžื ื™ืข ืื•ืชืš ื›ืืŸ? ื”ืื ื—ืฉื‘ืช ืขืœ ื›ืœ ื”ื”ืฉืœื›ื•ืช?"
255
+ elif conversation_state.selected_part == "ื”ื™ืœื“/ื” ื”ืคื ื™ืžื™ืช":
256
+ response = f"ืื ื™ {persona_name}, ื”ื—ืœืง ื”ืฆืขื™ืจ ืฉืœืš. ืžื” ืฉืืžืจืช ืขืœ '{user_message}' ื ื•ื’ืข ืœื™. ื–ื” ื’ื•ืจื ืœื™ ืœื”ืจื’ื™ืฉ... ืงืฆืช ืžืคื•ื—ื“ ืื‘ืœ ื’ื ืกืงืจืŸ. ืืชื” ื‘ืืžืช ืฉื•ืžืข ืื•ืชื™ ืขื›ืฉื™ื•?"
257
+ elif conversation_state.selected_part == "ื”ืžืจืฆื”":
258
+ response = f"ืื ื™ {persona_name}. ืžื” ืฉืืžืจืช - '{user_message}' - ืื ื™ ืจื•ืฆื” ืœื•ื•ื“ื ืฉื›ื•ืœื ื™ื”ื™ื• ื‘ืกื“ืจ ืขื ื–ื”. ืื™ืš ืืชื” ื—ื•ืฉื‘ ืฉื–ื” ื™ืฉืคื™ืข ืขืœ ื”ืื—ืจื™ื? ื‘ื•ืื ื• ื ืžืฆื ืคืชืจื•ืŸ ืฉืžืชืื™ื ืœื›ื•ืœื."
259
+ elif conversation_state.selected_part == "ื”ืžื’ืŸ":
260
+ response = f"ืื ื™ {persona_name}, ื”ืฉื•ืžืจ ืฉืœืš. '{user_message}' - ืื ื™ ืžืขืจื™ืš ืืช ื”ืžืฆื‘. ื”ืื ื–ื” ื‘ื˜ื•ื—? ื”ืื ืื ื™ ืฆืจื™ืš ืœื“ืื•ื’ ืœืžืฉื”ื•? ืชืคืงื™ื“ื™ ืœืฉืžื•ืจ ืขืœื™ืš."
261
+ elif conversation_state.selected_part == "ื”ื ืžื ืข/ืช":
262
+ response = f"ืื ื™ {persona_name}. ืžื” ืฉืืžืจืช ืขืœ '{user_message}' ืžืขื•ืจืจ ื‘ื™ ืงืฆืช ื—ืจื“ื”. ืื•ืœื™... ืœื ื—ื™ื™ื‘ื™ื ืœื”ืชืžื•ื“ื“ ืขื ื–ื” ืขื›ืฉื™ื•? ืœืคืขืžื™ื ื–ื” ื‘ืกื“ืจ ืœืงื—ืช ื”ืคืกืงื”."
263
+ else:
264
+ response = f"ืื ื™ {persona_name}, {conversation_state.selected_part} ืฉืœืš. ืฉืžืขืชื™ ืžื” ืฉืืžืจืช ืขืœ '{user_message}'. ื‘ื•ืื ื• ื ืฉื•ื—ื— ืขืœ ื–ื” ื™ื—ื“."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
  return response
267
 
268
  except Exception as e:
269
  logger.error(f"Error generating response: {e}")
270
+ return "ืกืœื™ื—ื”, ื ืชืงืœืชื™ ื‘ื‘ืขื™ื” ื˜ื›ื ื™ืช. ื‘ื•ืื ื• ื ื ืกื” ืฉื•ื‘."
271
 
272
  def create_main_interface(self):
273
  """Create the main Gradio interface"""
 
580
  else:
581
  # Local development settings
582
  logger.info("Configuring for local development")
583
+
584
+ # Try to find an available port
585
+ default_port = int(os.getenv("GRADIO_SERVER_PORT", "7861"))
586
+ available_port = default_port
587
+
588
+ # Check if port is available, if not find next available
589
+ import socket
590
+ for port_try in range(default_port, default_port + 10):
591
+ try:
592
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
593
+ s.bind(('127.0.0.1', port_try))
594
+ available_port = port_try
595
+ break
596
+ except OSError:
597
+ continue
598
+
599
+ logger.info(f"Using port {available_port} for local development")
600
+
601
  launch_config.update({
602
  "server_name": "127.0.0.1",
603
+ "server_port": available_port,
604
+ "share": False, # Disable share for local development - can be enabled manually
605
  "quiet": False
606
  })
607