Chaptive / src /pages /_about_page.py
Jing997's picture
change app name
497bf08
import streamlit as st
from utils.helpers import get_image_base64, get_remote_image_url, resolve_image_source
def about_page() -> None:
st.title("About Chaptive AI")
st.markdown(
"Chaptive is a conversational tutor that turns long-form YouTube lectures into searchable,"
" grounded study sessions using Gemini, FastAPI, and a fully serverless AWS backbone."
)
cover_src = resolve_image_source("cover_page.jpg")
if cover_src:
st.image(cover_src, caption="Serverless AI data pipeline overview", width=512)
st.markdown("<hr>", unsafe_allow_html=True)
st.header("Project Overview")
st.write(
"Chaptive indexes pre-approved educational channels, downloads transcripts, chunks and embeds them,"\
" then exposes bookmarks, semantic search, QA, quizzes, and summaries via a single FastAPI Lambda."\
" Downstream clients (Hugging Face Space, Streamlit, CLI) interact through API Gateway using API keys"\
" and strict channel whitelists."
)
st.markdown("<hr>", unsafe_allow_html=True)
with st.expander("Whitelisted YouTube Channels ✅", expanded=True):
# Channel data: (logo filename, url, display name)
channel_data = [
("MIT OpenCourseWare.png", "https://www.youtube.com/@mitocw", "MIT OpenCourseWare"),
("Stanford Online.png", "https://www.youtube.com/@stanfordonline", "Stanford Online"),
("Harvard.png", "https://www.youtube.com/@harvard", "Harvard"),
("freeCodeCamp.png", "https://www.youtube.com/@freecodecamp", "freeCodeCamp.org"),
("TEDx.png", "https://www.youtube.com/@TEDx", "TEDx Talks"),
("SciShow.png", "https://www.youtube.com/@SciShow", "SciShow"),
("Udacity.png", "https://www.youtube.com/@Udacity", "Udacity"),
("ProgrammingWithMosh.png", "https://www.youtube.com/@programmingwithmosh", "Programming with Mosh"),
("Computerphile.png", "https://www.youtube.com/@Computerphile", "Computerphile"),
("edX.png", "https://www.youtube.com/@edXOnline", "edX"),
("TED.png", "https://www.youtube.com/@TED", "TED"),
("YaleCourses.png", "https://www.youtube.com/@YaleCourses", "YaleCourses"),
("Veritasium.png", "https://www.youtube.com/@veritasium", "Veritasium"),
("3blue1brown.png", "https://www.youtube.com/@3blue1brown", "3blue1brown"),
("CrashCourse.png", "https://www.youtube.com/@crashcourse", "CrashCourse"),
("KhanAcademy.png", "https://www.youtube.com/@khanacademy", "Khan Academy"),
("minutephysics.png", "https://www.youtube.com/@minutephysics", "minutephysics"),
("numberphile.png", "https://www.youtube.com/@numberphile", "numberphile"),
("TEDEd.png", "https://www.youtube.com/@TEDEd", "TED-Ed"),
("coursera.png", "https://www.youtube.com/@coursera", "Coursera"),
]
# Display in rows of 4 columns
num_cols = 4
for row_start in range(0, len(channel_data), num_cols):
cols = st.columns(num_cols)
for col_idx in range(num_cols):
idx = row_start + col_idx
if idx >= len(channel_data):
continue
img_filename, url, alt = channel_data[idx]
with cols[col_idx]:
img_base64 = get_image_base64(img_filename)
img_src = (
f"data:image/png;base64,{img_base64}"
if img_base64
else get_remote_image_url(img_filename)
)
st.markdown(
f"""
<a href='{url}' target='_blank'>
<img src='{img_src}' style='display:block;margin:auto;width:48px;height:48px;object-fit:contain;' alt='{alt}'/>
</a>
""",
unsafe_allow_html=True
)
st.markdown(f"<div style='text-align:center;'><a href='{url}' target='_blank'>{alt}</a></div>", unsafe_allow_html=True)
st.markdown("<hr>", unsafe_allow_html=True)
st.info(
"Only whitelisted channels are supported due to copyright restrictions and to ensure all ingested content is suitable for educational and tutoring purposes. This policy protects creators' intellectual property and guarantees high-quality, reliable study material for learners."
)
st.markdown("<hr>", unsafe_allow_html=True)
st.header("Key Features")
col1, col2 = st.columns(2)
with col1:
st.markdown(
"- **One-click ingestion** queues a background job that fetches transcripts and stores artifacts in S3.\n"
"- **Grounded QA** cites timestamped transcript snippets for every answer.\n"
"- **Bookmarks & summaries** give learners instant structure for long lectures."
)
with col2:
st.markdown(
"- **Quiz builder** mixes MCQ/open prompts sourced from Gemini.\n"
"- **Semantic search** surfaces the best matching transcript spans instantly.\n"
"- **Strict channel policy** enforces ingestion only from approved education partners."
)
st.markdown("<hr>", unsafe_allow_html=True)
st.header("How Chaptive AI Transforms Your Study Experience")
st.markdown("Chaptive AI is designed to help learners master complex material quickly and at scale:")
feature_emojis = [
"🚀", "🔎", "📑", "📝", "💡", "✅"
]
feature_titles = [
"One-click Ingestion",
"Grounded QA",
"Bookmarks & Summaries",
"Quiz Builder",
"Semantic Search",
"Strict Channel Policy"
]
feature_descriptions = [
"Instant access to hours of lectures—no manual steps.",
"Reliable, context-rich answers—less searching, more learning.",
"Digest lectures into key sections for easy review.",
"Self-test and reinforce knowledge instantly.",
"Find the exact explanation you need, fast.",
"Only high-quality, trusted educational content."
]
cols = st.columns(3)
for i in range(3):
with cols[i]:
st.markdown(f"{feature_emojis[i]} **{feature_titles[i]}**")
st.write(feature_descriptions[i])
cols2 = st.columns(3)
for i in range(3, 6):
with cols2[i-3]:
st.markdown(f"{feature_emojis[i]} **{feature_titles[i]}**")
st.write(feature_descriptions[i])
st.markdown(
"<br><b>Together, these features empower users to learn faster, retain more, and scale their study efforts across multiple subjects and lectures—making Chaptive AI a powerful tool for modern, rapid, and effective education.</b>", unsafe_allow_html=True
)
st.markdown("<hr>", unsafe_allow_html=True)
st.header("Tools & Technologies")
st.markdown(
"| Layer | Stack |\n| --- | --- |\n"
"| Backend | FastAPI + Pydantic running on AWS Lambda via Mangum |\n"
"| Storage | AWS S3 (artifacts + Lambda ZIP), DynamoDB (`ChaptiveJobs`), Secrets Manager |\n"
"| AI | Google Gemini (text-embedding-004, 1.5 Flash) |\n"
"| Infra | Terraform (S3, DynamoDB, IAM, Lambda, API Gateway, CloudWatch) |\n"
"| Frontend | Streamlit & Hugging Face Space clients |"
)
st.markdown("<hr>", unsafe_allow_html=True)
st.header("Architecture & Flow")
arch_src = resolve_image_source("user_flow_system_architecture.jpg")
if arch_src:
st.image(arch_src, caption="User Flow & System Architecture", width=800)
st.write(
"1. User submits a YouTube URL from an approved channel.\n"
"2. API Gateway forwards the request to the Lambda-hosted FastAPI app.\n"
"3. Background worker fetches transcripts, chunks + embeds with Gemini, and stores artifacts in S3 with lifecycle policies.\n"
"4. Job metadata lives in DynamoDB and is polled via `/videos/process/{job_id}`.\n"
"5. Subsequent queries (bookmarks, QA, summary, quiz, search) reuse cached artifacts for sub-second responses.\n"
"6. Logs & metrics stream into CloudWatch while Terraform keeps every resource in sync."
)
st.markdown("<hr>", unsafe_allow_html=True)
st.header("Getting Started")
st.markdown(
"1. Use the **Home** page to upload a YouTube lecture, browse bookmarks, and chat with Gemini.\n"
"2. Visit **Contact** page for support, licensing, and GitHub links."
)
if __name__ == "__main__":
about_page()