abidlabs HF Staff commited on
Commit
04d3991
·
verified ·
1 Parent(s): e3797ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -47
app.py CHANGED
@@ -1,56 +1,119 @@
 
 
 
 
 
1
  import gradio as gr
2
- import pandas as pd
3
- import requests
4
- from datetime import datetime, timedelta
5
-
6
- def fetch_pypi_stats(package_name):
7
- """Fetch PyPI stats for the past 12 weeks."""
8
- url = f"https://pypistats.org/api/packages/{package_name}/overall"
9
- resp = requests.get(url)
10
- if resp.status_code != 200:
11
- raise ValueError(f"Failed to fetch data for {package_name}")
12
-
13
- data = resp.json()["data"]
14
- df = pd.DataFrame(data)
15
- df["date"] = pd.to_datetime(df["date"])
16
- df["downloads"] = df["downloads"].astype(int)
17
-
18
- # Keep only last 12 weeks
19
- recent = df[df["date"] > datetime.now() - timedelta(weeks=12)]
20
- return recent
21
-
22
- def compare_downloads():
23
- """Fetch data for both packages and return a merged DataFrame for plotting."""
24
- trackio_df = fetch_pypi_stats("trackio")
25
- wandb_df = fetch_pypi_stats("wandb")
26
-
27
- trackio_df["package"] = "trackio"
28
- wandb_df["package"] = "wandb"
29
-
30
- df = pd.concat([trackio_df, wandb_df])
31
- df = df.sort_values("date")
32
-
33
  return df
34
 
35
- def plot_downloads():
36
- df = compare_downloads()
37
- return gr.LinePlot(
38
- df,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  x="date",
40
- y="downloads",
41
  color="package",
42
- title="PyPI Downloads (past 12 weeks)",
43
- tooltip=["package", "downloads", "date"],
44
- y_title="Downloads per Day",
45
- x_title="Date",
46
- height=500,
 
 
 
 
 
 
 
 
 
 
 
47
  )
48
 
49
- with gr.Blocks(title="PyPI Download Trends") as demo:
50
- gr.Markdown("# 📊 PyPI Download Trends for `trackio` vs `wandb`\nShows the past 12 weeks of download data.")
51
- plot_button = gr.Button("Fetch & Plot Latest Stats")
52
- plot_output = gr.LinePlot()
53
-
54
- plot_button.click(fn=lambda: compare_downloads(), outputs=plot_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  demo.launch()
 
1
+ #!/usr/bin/env python3
2
+ import datetime
3
+ import httpx
4
+ import polars as pl
5
+ import plotly.express as px
6
  import gradio as gr
7
+
8
+ PACKAGES = ["trackio", "wandb", "neptune", "comet-ml"]
9
+
10
+ def fetch_pypi_stats(package: str) -> pl.DataFrame:
11
+ """Fetch daily PyPI download stats from pypistats.org for a given package."""
12
+ url = f"https://pypistats.org/api/packages/{package}/overall"
13
+ r = httpx.get(url, timeout=30.0)
14
+ r.raise_for_status()
15
+ data = r.json().get("data", [])
16
+ df = pl.DataFrame(data)
17
+ if df.is_empty():
18
+ return df
19
+ df = df.select(["date", "downloads"]).with_columns(
20
+ pl.col("date").str.strptime(pl.Date, "%Y-%m-%d")
21
+ )
22
+ df = df.sort("date")
23
+ df = df.with_columns(pl.lit(package).alias("package"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  return df
25
 
26
+ def fetch_all():
27
+ dfs = []
28
+ for pkg in PACKAGES:
29
+ df = fetch_pypi_stats(pkg)
30
+ if not df.is_empty():
31
+ # Ensure at least one nonzero download for rangeslider
32
+ if df["downloads"].sum() == 0:
33
+ df = df.with_columns(pl.lit(1).alias("downloads"))
34
+ dfs.append(df)
35
+ if not dfs:
36
+ return pl.DataFrame(schema=["date", "downloads", "package"])
37
+ return pl.concat(dfs)
38
+
39
+ def get_fig(df: pl.DataFrame, start_date=None, end_date=None, smooth=False, n_days=7):
40
+ if df.is_empty():
41
+ return px.scatter(title="No data available")
42
+
43
+ # Shift downloads by 1 to avoid log(0) issues
44
+ df = df.with_columns((pl.col("downloads") + 1).alias("downloads"))
45
+
46
+ if smooth:
47
+ df = df.with_columns(
48
+ pl.col("downloads")
49
+ .rolling_mean(window_size=n_days)
50
+ .over("package")
51
+ .alias(f"{n_days}-day avg")
52
+ )
53
+ y = f"{n_days}-day avg"
54
+ else:
55
+ y = "downloads"
56
+
57
+ color_map = {pkg: ("red" if pkg == "trackio" else None) for pkg in PACKAGES}
58
+
59
+ fig = px.line(
60
+ df.to_pandas(),
61
  x="date",
62
+ y=y,
63
  color="package",
64
+ title=f"PyPI Downloads {', '.join(PACKAGES)}",
65
+ markers=True,
66
+ log_y=True,
67
+ color_discrete_map=color_map
68
+ )
69
+
70
+ # Trackio stands out
71
+ for trace in fig.data:
72
+ if trace.name == "trackio":
73
+ trace.line.width = 4
74
+ trace.marker.symbol = "star"
75
+ trace.marker.size = 10
76
+
77
+ # --- REMOVE THE PREVIEW / RANGESLIDER ---
78
+ fig.update_layout(
79
+ xaxis_rangeslider_visible=False # <- this disables the preview
80
  )
81
 
82
+ if start_date and end_date:
83
+ fig.update_layout(xaxis_range=[start_date, end_date])
84
+
85
+ fig.update_yaxes(title="Downloads per day (log scale)")
86
+ fig.update_xaxes(title="Date")
87
+ fig.update_layout(legend_title="Package")
88
+
89
+ return fig
90
+
91
+ def update_fig(start_date, end_date, smooth, n_days):
92
+ df = fetch_all()
93
+ return get_fig(df, start_date, end_date, smooth, n_days)
94
+
95
+ def update_date_range(delta_days: int = 42): # ~12 weeks
96
+ today = datetime.datetime.now(datetime.timezone.utc)
97
+ start_date = today - datetime.timedelta(days=delta_days)
98
+ return start_date, today
99
+
100
+ # --- Gradio App ---
101
+ with gr.Blocks() as demo:
102
+ gr.Markdown("## 📈 PyPI Downloads Comparison (trackio, wandb, neptune, comet-ml)")
103
+ with gr.Row():
104
+ start_date = gr.DateTime(label="Start date", type="datetime", include_time=False)
105
+ end_date = gr.DateTime(label="End date", type="datetime", include_time=False)
106
+ with gr.Row():
107
+ smooth = gr.Checkbox(label="Show moving average", value=True)
108
+ n_days = gr.Slider(label="Days", minimum=1, maximum=28, step=1, value=7)
109
+ fig = gr.Plot(label="PyPI Downloads Comparison")
110
+
111
+ demo.load(fn=update_date_range, outputs=[start_date, end_date])
112
+ gr.on(
113
+ triggers=[start_date.change, end_date.change, smooth.change, n_days.change],
114
+ fn=update_fig,
115
+ inputs=[start_date, end_date, smooth, n_days],
116
+ outputs=fig,
117
+ )
118
 
119
  demo.launch()