#!/usr/bin/env python3 import datetime import httpx import polars as pl import plotly.express as px import gradio as gr PACKAGES = ["trackio", "wandb", "neptune", "comet-ml"] def fetch_pypi_stats(package: str) -> pl.DataFrame: """Fetch daily PyPI download stats from pypistats.org for a given package.""" url = f"https://pypistats.org/api/packages/{package}/overall" r = httpx.get(url, timeout=30.0) r.raise_for_status() data = r.json().get("data", []) df = pl.DataFrame(data) if df.is_empty(): return df df = df.select(["date", "downloads"]).with_columns( pl.col("date").str.strptime(pl.Date, "%Y-%m-%d") ) df = df.sort("date") df = df.with_columns(pl.lit(package).alias("package")) return df def fetch_all(): dfs = [] for pkg in PACKAGES: df = fetch_pypi_stats(pkg) if not df.is_empty(): # Ensure at least one nonzero download for rangeslider if df["downloads"].sum() == 0: df = df.with_columns(pl.lit(1).alias("downloads")) dfs.append(df) if not dfs: return pl.DataFrame(schema=["date", "downloads", "package"]) return pl.concat(dfs) def get_fig(df: pl.DataFrame, start_date=None, end_date=None, smooth=False, n_days=7): if df.is_empty(): return px.scatter(title="No data available") # Shift downloads by 1 to avoid log(0) issues df = df.with_columns((pl.col("downloads") + 1).alias("downloads")) if smooth: df = df.with_columns( pl.col("downloads") .rolling_mean(window_size=n_days) .over("package") .alias(f"{n_days}-day avg") ) y = f"{n_days}-day avg" else: y = "downloads" color_map = {pkg: ("red" if pkg == "trackio" else None) for pkg in PACKAGES} fig = px.line( df.to_pandas(), x="date", y=y, color="package", title=f"PyPI Downloads — {', '.join(PACKAGES)}", markers=True, log_y=True, color_discrete_map=color_map ) # Trackio stands out for trace in fig.data: if trace.name == "trackio": trace.line.width = 4 trace.marker.symbol = "star" trace.marker.size = 10 # --- REMOVE THE PREVIEW / RANGESLIDER --- fig.update_layout( xaxis_rangeslider_visible=False # <- this disables the preview ) if start_date and end_date: fig.update_layout(xaxis_range=[start_date, end_date]) fig.update_yaxes(title="Downloads per day (log scale)") fig.update_xaxes(title="Date") fig.update_layout(legend_title="Package") return fig def update_fig(start_date, end_date, smooth, n_days): df = fetch_all() return get_fig(df, start_date, end_date, smooth, n_days) def update_date_range(delta_days: int = 42): # ~12 weeks today = datetime.datetime.now(datetime.timezone.utc) start_date = today - datetime.timedelta(days=delta_days) return start_date, today # --- Gradio App --- with gr.Blocks() as demo: gr.Markdown("## 📈 PyPI Downloads Comparison (trackio, wandb, neptune, comet-ml)") with gr.Row(): start_date = gr.DateTime(label="Start date", type="datetime", include_time=False) end_date = gr.DateTime(label="End date", type="datetime", include_time=False) with gr.Row(): smooth = gr.Checkbox(label="Show moving average", value=True) n_days = gr.Slider(label="Days", minimum=1, maximum=28, step=1, value=7) fig = gr.Plot(label="PyPI Downloads Comparison") demo.load(fn=update_date_range, outputs=[start_date, end_date]) gr.on( triggers=[start_date.change, end_date.change, smooth.change, n_days.change], fn=update_fig, inputs=[start_date, end_date, smooth, n_days], outputs=fig, ) demo.launch()