Spaces:
Runtime error
Runtime error
| import json | |
| from typing import Any, Dict, List, Optional, Union | |
| import gradio as gr | |
| import httpx | |
| from cachetools import TTLCache, cached | |
| from gradio_client import Client | |
| from toolz import groupby | |
| CACHE_TIME = 60 * 60 * 1 # 1 hour | |
| client = Client("https://librarian-bots-collection-papers-extractor.hf.space/") | |
| def get_arxiv_ids_from_slug( | |
| slug: str, | |
| ) -> Dict[str, Union[None, Dict[str, Dict[str, Union[List[str], List[str]]]]]]: | |
| result = client.predict(slug, api_name="/predict") | |
| with open(result) as f: | |
| data = json.load(f) | |
| return data | |
| def format_arxiv_id_for_semantic_scholar(arxiv_id: str) -> str: | |
| return f"ArXiv:{arxiv_id}" | |
| def format_ids(data, exclude_keys: Optional[list[str]] = None) -> list[str]: | |
| arxiv_ids = [] | |
| if exclude_keys is not None: | |
| data = {k: v for k, v in data.items() if k not in exclude_keys} | |
| # check if dict now empty | |
| if not data: | |
| return [] | |
| for repo in data.values(): | |
| if repo is None: | |
| continue | |
| for item in repo.values(): | |
| arxiv_ids.extend(item["arxiv_ids"]) | |
| # format for semantic scholar | |
| return [format_arxiv_id_for_semantic_scholar(id) for id in arxiv_ids] | |
| def get_recommendations_from_semantic_scholar(paper_ids: tuple[str]): | |
| paper_ids = list(paper_ids) | |
| print(paper_ids) | |
| r = httpx.post( | |
| "https://api.semanticscholar.org/recommendations/v1/papers/", | |
| json={ | |
| "positivePaperIds": paper_ids, | |
| }, | |
| params={"fields": "externalIds,title,year", "limit": 10}, | |
| timeout=30, | |
| ) | |
| print(r.text) | |
| return r.json() | |
| def is_arxiv_paper(recommendation: Dict[str, Any]) -> bool: | |
| return recommendation["externalIds"].get("ArXiv", None) is not None | |
| def group_by_is_arxiv_paper( | |
| recommendations: List[Dict[str, Any]] | |
| ) -> Dict[bool, List[Dict[str, Any]]]: | |
| return groupby(is_arxiv_paper, recommendations) | |
| def format_recommendation_into_markdown( | |
| grouped_recommendations: Dict[bool, List[Dict[str, Any]]] | |
| ): | |
| comment = "The following papers were recommended by the Semantic Scholar API \n\n" | |
| arxiv_papers = grouped_recommendations.get(True) | |
| if arxiv_papers: | |
| comment += "## Papers available on Hugging Face Papers:\n\n" | |
| for r in arxiv_papers: | |
| hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}" | |
| comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n" | |
| other_papers = grouped_recommendations.get(False) | |
| if other_papers: | |
| comment += "\n\n## Other papers:\n\n" | |
| for r in other_papers: | |
| comment += f"* {r['title']} ({r['year']})\n" | |
| return comment | |
| def map_repo_name_to_api_key(repo_name: str) -> str: | |
| return { | |
| "datasets": "dataset papers", | |
| "models": "model papers", | |
| "papers": "papers", | |
| }[repo_name] | |
| def get_recommendations_from_slug( | |
| slug: str, excluded_repo_types: Optional[list[str]] = None | |
| ): | |
| excluded_repo_types = tuple(excluded_repo_types) | |
| return _get_recommendations_from_slug(slug, excluded_repo_types=excluded_repo_types) | |
| def _get_recommendations_from_slug( | |
| slug: str, excluded_repo_types: Optional[tuple[str]] = None | |
| ): | |
| data = get_arxiv_ids_from_slug(slug) | |
| if excluded_repo_types: | |
| excluded_repo_types = list(excluded_repo_types) | |
| excluded_repo_types = [map_repo_name_to_api_key(k) for k in excluded_repo_types] | |
| print(f"excluded_repo_types_remapped={excluded_repo_types}") | |
| ids = format_ids(data, exclude_keys=excluded_repo_types) | |
| if not ids: | |
| return ( | |
| "Based on your collection and exclusions" | |
| f" ({','.join(excluded_repo_types)}), there are no papers to recommend. Try" | |
| " removing some excluded repo types or adding more items to your" | |
| " collection." | |
| ) | |
| ids = tuple(ids) | |
| recommendations = get_recommendations_from_semantic_scholar(ids) | |
| recommendations = recommendations.get("recommendedPapers") | |
| if recommendations is None: | |
| raise gr.Error("Something went wrong with the Semantic Scholar API") | |
| grouped = group_by_is_arxiv_paper(recommendations) | |
| return format_recommendation_into_markdown(grouped) | |
| title = """π Collections Reading List Generator π""" | |
| description = """<img src="https://huggingface.co/datasets/librarian-bots/images/raw/main/Mascot%20Bookie.svg" | |
| alt="Mascot Bookie" width="200" style="float:left; margin-right:20px; margin-bottom:20px;"> | |
| \n\n | |
| Hugging Face Collections allow you to curate models, datasets, spaces, | |
| and papers from the Hugging Face Hub. | |
| This Space will generate a reading list based on the items in your collection. | |
| This can be a great way to find related papers to the models and datasets in your collection and dive more deeply into a topic! | |
| The Space works by: | |
| - finding any papers in your collection | |
| - finding papers related to the models and datasets in your collection | |
| - requesting recommendations from the [Semantic Scholar API](https://api.semanticscholar.org/api-docs/recommendations#tag/Paper-Recommendations/operation/post_papers) for these papers. | |
| You can optionally exclude certain repo types fromm consideration when generating the reading list. | |
| """ | |
| slug_input = gr.Textbox( | |
| lines=1, | |
| label="Collection Slug", | |
| placeholder="merve/video-classification-models-6509edd0a6f657faa425e8c3", | |
| ) | |
| example_slugs = [ | |
| ["merve/video-classification-models-6509edd0a6f657faa425e8c3", []], | |
| ["osanseviero/model-merging-65097893623330a3a51ead66", []], | |
| ["hf4h/clinical-language-models-64f9c1cd0cedc04f3caca264", []], | |
| ] | |
| gr.Interface( | |
| get_recommendations_from_slug, | |
| inputs=[ | |
| slug_input, | |
| gr.Dropdown( | |
| label="Repos to exclude from contributing to recommendations", | |
| choices=["datasets", "models", "papers"], | |
| multiselect=True, | |
| ), | |
| ], | |
| outputs="markdown", | |
| description=description, | |
| title=title, | |
| allow_flagging="never", | |
| examples=example_slugs, | |
| ).launch() | |