Spaces:

Jing997
/

Chaptive

Running

App Files Files Community

Chaptive / src /pages /_about_page.py

Jing997

change app name

497bf08 10 days ago

raw

history blame contribute delete

8.64 kB

	import streamlit as st

	from utils.helpers import get_image_base64, get_remote_image_url, resolve_image_source


	def about_page() -> None:
	st.title("About Chaptive AI")
	st.markdown(
	"Chaptive is a conversational tutor that turns long-form YouTube lectures into searchable,"
	" grounded study sessions using Gemini, FastAPI, and a fully serverless AWS backbone."
	)

	cover_src = resolve_image_source("cover_page.jpg")
	if cover_src:
	st.image(cover_src, caption="Serverless AI data pipeline overview", width=512)

	st.markdown("<hr>", unsafe_allow_html=True)
	st.header("Project Overview")
	st.write(
	"Chaptive indexes pre-approved educational channels, downloads transcripts, chunks and embeds them,"\
	" then exposes bookmarks, semantic search, QA, quizzes, and summaries via a single FastAPI Lambda."\
	" Downstream clients (Hugging Face Space, Streamlit, CLI) interact through API Gateway using API keys"\
	" and strict channel whitelists."
	)

	st.markdown("<hr>", unsafe_allow_html=True)
	with st.expander("Whitelisted YouTube Channels ✅", expanded=True):
	# Channel data: (logo filename, url, display name)
	channel_data = [
	("MIT OpenCourseWare.png", "https://www.youtube.com/@mitocw", "MIT OpenCourseWare"),
	("Stanford Online.png", "https://www.youtube.com/@stanfordonline", "Stanford Online"),
	("Harvard.png", "https://www.youtube.com/@harvard", "Harvard"),
	("freeCodeCamp.png", "https://www.youtube.com/@freecodecamp", "freeCodeCamp.org"),
	("TEDx.png", "https://www.youtube.com/@TEDx", "TEDx Talks"),
	("SciShow.png", "https://www.youtube.com/@SciShow", "SciShow"),
	("Udacity.png", "https://www.youtube.com/@Udacity", "Udacity"),
	("ProgrammingWithMosh.png", "https://www.youtube.com/@programmingwithmosh", "Programming with Mosh"),
	("Computerphile.png", "https://www.youtube.com/@Computerphile", "Computerphile"),
	("edX.png", "https://www.youtube.com/@edXOnline", "edX"),
	("TED.png", "https://www.youtube.com/@TED", "TED"),
	("YaleCourses.png", "https://www.youtube.com/@YaleCourses", "YaleCourses"),
	("Veritasium.png", "https://www.youtube.com/@veritasium", "Veritasium"),
	("3blue1brown.png", "https://www.youtube.com/@3blue1brown", "3blue1brown"),
	("CrashCourse.png", "https://www.youtube.com/@crashcourse", "CrashCourse"),
	("KhanAcademy.png", "https://www.youtube.com/@khanacademy", "Khan Academy"),
	("minutephysics.png", "https://www.youtube.com/@minutephysics", "minutephysics"),
	("numberphile.png", "https://www.youtube.com/@numberphile", "numberphile"),
	("TEDEd.png", "https://www.youtube.com/@TEDEd", "TED-Ed"),
	("coursera.png", "https://www.youtube.com/@coursera", "Coursera"),
	]
	# Display in rows of 4 columns
	num_cols = 4
	for row_start in range(0, len(channel_data), num_cols):
	cols = st.columns(num_cols)
	for col_idx in range(num_cols):
	idx = row_start + col_idx
	if idx >= len(channel_data):
	continue
	img_filename, url, alt = channel_data[idx]
	with cols[col_idx]:
	img_base64 = get_image_base64(img_filename)
	img_src = (
	f"data:image/png;base64,{img_base64}"
	if img_base64
	else get_remote_image_url(img_filename)
	)
	st.markdown(
	f"""
	<a href='{url}' target='_blank'>
	<img src='{img_src}' style='display:block;margin:auto;width:48px;height:48px;object-fit:contain;' alt='{alt}'/>
	</a>
	""",
	unsafe_allow_html=True
	)
	st.markdown(f"<div style='text-align:center;'><a href='{url}' target='_blank'>{alt}</a></div>", unsafe_allow_html=True)

	st.markdown("<hr>", unsafe_allow_html=True)
	st.info(
	"Only whitelisted channels are supported due to copyright restrictions and to ensure all ingested content is suitable for educational and tutoring purposes. This policy protects creators' intellectual property and guarantees high-quality, reliable study material for learners."
	)

	st.markdown("<hr>", unsafe_allow_html=True)
	st.header("Key Features")
	col1, col2 = st.columns(2)
	with col1:
	st.markdown(
	"- One-click ingestion queues a background job that fetches transcripts and stores artifacts in S3.\n"
	"- Grounded QA cites timestamped transcript snippets for every answer.\n"
	"- Bookmarks & summaries give learners instant structure for long lectures."
	)
	with col2:
	st.markdown(
	"- Quiz builder mixes MCQ/open prompts sourced from Gemini.\n"
	"- Semantic search surfaces the best matching transcript spans instantly.\n"
	"- Strict channel policy enforces ingestion only from approved education partners."
	)

	st.markdown("<hr>", unsafe_allow_html=True)
	st.header("How Chaptive AI Transforms Your Study Experience")
	st.markdown("Chaptive AI is designed to help learners master complex material quickly and at scale:")
	feature_emojis = [
	"🚀", "🔎", "📑", "📝", "💡", "✅"
	]
	feature_titles = [
	"One-click Ingestion",
	"Grounded QA",
	"Bookmarks & Summaries",
	"Quiz Builder",
	"Semantic Search",
	"Strict Channel Policy"
	]
	feature_descriptions = [
	"Instant access to hours of lectures—no manual steps.",
	"Reliable, context-rich answers—less searching, more learning.",
	"Digest lectures into key sections for easy review.",
	"Self-test and reinforce knowledge instantly.",
	"Find the exact explanation you need, fast.",
	"Only high-quality, trusted educational content."
	]
	cols = st.columns(3)
	for i in range(3):
	with cols[i]:
	st.markdown(f"{feature_emojis[i]} {feature_titles[i]}")
	st.write(feature_descriptions[i])
	cols2 = st.columns(3)
	for i in range(3, 6):
	with cols2[i-3]:
	st.markdown(f"{feature_emojis[i]} {feature_titles[i]}")
	st.write(feature_descriptions[i])
	st.markdown(
	"<br><b>Together, these features empower users to learn faster, retain more, and scale their study efforts across multiple subjects and lectures—making Chaptive AI a powerful tool for modern, rapid, and effective education.</b>", unsafe_allow_html=True
	)

	st.markdown("<hr>", unsafe_allow_html=True)
	st.header("Tools & Technologies")
	st.markdown(
	"\| Layer \| Stack \|\n\| --- \| --- \|\n"
	"\| Backend \| FastAPI + Pydantic running on AWS Lambda via Mangum \|\n"
	"\| Storage \| AWS S3 (artifacts + Lambda ZIP), DynamoDB (`ChaptiveJobs`), Secrets Manager \|\n"
	"\| AI \| Google Gemini (text-embedding-004, 1.5 Flash) \|\n"
	"\| Infra \| Terraform (S3, DynamoDB, IAM, Lambda, API Gateway, CloudWatch) \|\n"
	"\| Frontend \| Streamlit & Hugging Face Space clients \|"
	)

	st.markdown("<hr>", unsafe_allow_html=True)
	st.header("Architecture & Flow")
	arch_src = resolve_image_source("user_flow_system_architecture.jpg")
	if arch_src:
	st.image(arch_src, caption="User Flow & System Architecture", width=800)
	st.write(
	"1. User submits a YouTube URL from an approved channel.\n"
	"2. API Gateway forwards the request to the Lambda-hosted FastAPI app.\n"
	"3. Background worker fetches transcripts, chunks + embeds with Gemini, and stores artifacts in S3 with lifecycle policies.\n"
	"4. Job metadata lives in DynamoDB and is polled via `/videos/process/{job_id}`.\n"
	"5. Subsequent queries (bookmarks, QA, summary, quiz, search) reuse cached artifacts for sub-second responses.\n"
	"6. Logs & metrics stream into CloudWatch while Terraform keeps every resource in sync."
	)

	st.markdown("<hr>", unsafe_allow_html=True)
	st.header("Getting Started")
	st.markdown(
	"1. Use the Home page to upload a YouTube lecture, browse bookmarks, and chat with Gemini.\n"
	"2. Visit Contact page for support, licensing, and GitHub links."
	)


	if __name__ == "__main__":
	about_page()