Spaces:
Sleeping
Sleeping
File size: 2,631 Bytes
a46c1ac 84d4e4f f361dc7 84d4e4f 084189b 84d4e4f 084189b f361dc7 d2c5505 084189b f361dc7 d2c5505 84d4e4f f361dc7 084189b 2f36884 84d4e4f 2f36884 084189b 84d4e4f 2f36884 084189b 84d4e4f 084189b 84d4e4f 084189b 84d4e4f 084189b 84d4e4f 084189b 84d4e4f 084189b a46c1ac 84d4e4f d2c5505 f361dc7 84d4e4f f361dc7 d2c5505 a46c1ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# Dockerfile
FROM python:3.11-slim AS builder
# Set environment variables for Python optimization
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
OLLAMA_MODELS=/home/ollama/.ollama \
OLLAMA_HOST=0.0.0.0
# Install build dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
wget \
&& apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install Ollama
RUN curl -fsSL https://ollama.ai/install.sh | sh
# Final stage
FROM python:3.11-slim
# Create a non-root user
RUN useradd -m -u 1000 ollama && \
mkdir -p /home/ollama/.ollama && \
chown -R ollama:ollama /home/ollama
WORKDIR /app
# Copy Ollama binaries from builder stage
COPY --from=builder /usr/local/bin/ollama /usr/local/bin/ollama
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Set proper ownership and permissions
RUN chown -R ollama:ollama /app && \
chmod -R 755 /app
# Switch to ollama user
USER ollama
# Create a startup script with configurable model pulling and enhanced logging
RUN echo '#!/bin/bash\n\
set -e\n\
LOG_FILE=/home/ollama/startup.log\n\
echo "Starting Ollama server at $(date)" >> $LOG_FILE\n\
ollama serve >> $LOG_FILE 2>&1 &\n\
sleep 15\n\
MODELS_TO_PULL="${MODELS_TO_PULL:-hf.co/gguf-org/gemma-3-270m-gguf:Q5_K_S}"\n\
echo "Pulling models: $MODELS_TO_PULL" | tee -a $LOG_FILE\n\
IFS=',' read -ra MODEL_ARRAY <<< "$MODELS_TO_PULL"\n\
for model in "${MODEL_ARRAY[@]}"; do\n\
echo "Pulling model $model..." | tee -a $LOG_FILE\n\
for attempt in {1..3}; do\n\
if ollama pull "$model" >> $LOG_FILE 2>&1; then\n\
echo "Model $model pulled successfully" | tee -a $LOG_FILE\n\
break\n\
else\n\
echo "Attempt $attempt: Failed to pull model $model, retrying in 10 seconds..." | tee -a $LOG_FILE\n\
sleep 10\n\
fi\n\
if [ $attempt -eq 3 ]; then\n\
echo "Error: Failed to pull model $model after 3 attempts" | tee -a $LOG_FILE\n\
exit 1\n\
fi\n\
done\n\
done\n\
echo "Starting Gunicorn server at $(date)" | tee -a $LOG_FILE\n\
exec python3 -m gunicorn --bind 0.0.0.0:7860 --workers 1 --timeout 120 --log-level info app:app >> $LOG_FILE 2>&1' > /app/start.sh && \
chmod +x /app/start.sh
# Expose port
EXPOSE 7860
# Health check with optimized parameters
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Run the startup script
CMD ["/app/start.sh"] |