File size: 2,631 Bytes
a46c1ac
84d4e4f
f361dc7
84d4e4f
084189b
 
 
 
 
84d4e4f
084189b
 
f361dc7
d2c5505
084189b
 
f361dc7
d2c5505
 
 
84d4e4f
 
 
 
 
 
 
 
 
 
 
 
 
f361dc7
 
 
 
 
 
 
084189b
 
 
2f36884
 
 
 
84d4e4f
2f36884
084189b
84d4e4f
 
 
2f36884
084189b
84d4e4f
084189b
 
84d4e4f
084189b
84d4e4f
 
084189b
 
84d4e4f
084189b
 
 
84d4e4f
084189b
 
 
a46c1ac
84d4e4f
 
d2c5505
 
f361dc7
 
 
84d4e4f
 
f361dc7
 
d2c5505
a46c1ac
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Dockerfile
FROM python:3.11-slim AS builder

# Set environment variables for Python optimization
ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    OLLAMA_MODELS=/home/ollama/.ollama \
    OLLAMA_HOST=0.0.0.0

# Install build dependencies
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    curl \
    wget \
    && apt-get clean && \
    rm -rf /var/lib/apt/lists/*

# Install Ollama
RUN curl -fsSL https://ollama.ai/install.sh | sh

# Final stage
FROM python:3.11-slim

# Create a non-root user
RUN useradd -m -u 1000 ollama && \
    mkdir -p /home/ollama/.ollama && \
    chown -R ollama:ollama /home/ollama

WORKDIR /app

# Copy Ollama binaries from builder stage
COPY --from=builder /usr/local/bin/ollama /usr/local/bin/ollama

# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY . .

# Set proper ownership and permissions
RUN chown -R ollama:ollama /app && \
    chmod -R 755 /app

# Switch to ollama user
USER ollama

# Create a startup script with configurable model pulling and enhanced logging
RUN echo '#!/bin/bash\n\
set -e\n\
LOG_FILE=/home/ollama/startup.log\n\
echo "Starting Ollama server at $(date)" >> $LOG_FILE\n\
ollama serve >> $LOG_FILE 2>&1 &\n\
sleep 15\n\
MODELS_TO_PULL="${MODELS_TO_PULL:-hf.co/gguf-org/gemma-3-270m-gguf:Q5_K_S}"\n\
echo "Pulling models: $MODELS_TO_PULL" | tee -a $LOG_FILE\n\
IFS=',' read -ra MODEL_ARRAY <<< "$MODELS_TO_PULL"\n\
for model in "${MODEL_ARRAY[@]}"; do\n\
    echo "Pulling model $model..." | tee -a $LOG_FILE\n\
    for attempt in {1..3}; do\n\
        if ollama pull "$model" >> $LOG_FILE 2>&1; then\n\
            echo "Model $model pulled successfully" | tee -a $LOG_FILE\n\
            break\n\
        else\n\
            echo "Attempt $attempt: Failed to pull model $model, retrying in 10 seconds..." | tee -a $LOG_FILE\n\
            sleep 10\n\
        fi\n\
        if [ $attempt -eq 3 ]; then\n\
            echo "Error: Failed to pull model $model after 3 attempts" | tee -a $LOG_FILE\n\
            exit 1\n\
        fi\n\
    done\n\
done\n\
echo "Starting Gunicorn server at $(date)" | tee -a $LOG_FILE\n\
exec python3 -m gunicorn --bind 0.0.0.0:7860 --workers 1 --timeout 120 --log-level info app:app >> $LOG_FILE 2>&1' > /app/start.sh && \
    chmod +x /app/start.sh

# Expose port
EXPOSE 7860

# Health check with optimized parameters
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Run the startup script
CMD ["/app/start.sh"]