Spaces:
Sleeping
Sleeping
| import httpx | |
| from cytoolz import groupby | |
| from functools import lru_cache | |
| from rich import print | |
| from functools import partial | |
| import gradio as gr | |
| from typing import Optional | |
| def query_author(author_name: str): | |
| url = f"https://api.semanticscholar.org/graph/v1/author/search?query={author_name}&fields=name,url,externalIds,papers.externalIds,papers.title,papers.year" | |
| resp = httpx.get(url) | |
| resp.raise_for_status() | |
| return resp.json()["data"] | |
| def get_arxiv_paper(papers): | |
| papers_with_externalIds = [paper for paper in papers if paper.get("externalIds")] | |
| return [ | |
| paper for paper in papers_with_externalIds if paper["externalIds"].get("ArXiv") | |
| ] | |
| def check_arxiv_in_papers(arxiv_ids, papers): | |
| papers_with_externalIds = [paper for paper in papers if paper.get("externalIds")] | |
| papers_with_arxiv_ids = [ | |
| paper for paper in papers_with_externalIds if paper["externalIds"].get("ArXiv") | |
| ] | |
| return any( | |
| paper | |
| for paper in papers_with_arxiv_ids | |
| if paper["externalIds"].get("ArXiv") in arxiv_ids | |
| ) | |
| def get_author_from_options(potential_authors, positive_arxiv_ids): | |
| return next( | |
| ( | |
| author | |
| for author in potential_authors | |
| if check_arxiv_in_papers(set(positive_arxiv_ids), author["papers"]) | |
| ), | |
| None, | |
| ) | |
| def sort_by_date(papers): | |
| return sorted(papers, key=lambda paper: paper["year"], reverse=True) | |
| def lookup_hf_paper(arxiv_id): | |
| url = f"https://huggingface.co/api/papers/{arxiv_id}" | |
| resp = httpx.get(url) | |
| return resp.json() | |
| def check_if_index_hf_paper(paper): | |
| arxiv_id = paper["externalIds"]["ArXiv"] | |
| data = lookup_hf_paper(arxiv_id) | |
| return not data.get("error") | |
| def groupby_indexed_by_hf_papers(papers): | |
| return groupby(check_if_index_hf_paper, papers) | |
| def check_hf_user_in_authors(paper, hf_user_name): | |
| authors = paper["authors"] | |
| authors = [author for author in authors if author.get("user")] | |
| return any(author["user"]["user"] == hf_user_name for author in authors) | |
| def groupby_hf_user_papers(papers, hf_user_name): | |
| check_hf_user_in_authors_partial = partial( | |
| check_hf_user_in_authors, hf_user_name=hf_user_name | |
| ) | |
| return groupby(check_hf_user_in_authors_partial, papers) | |
| def get_papers( | |
| author_name: str, positive_arxiv_ids: str, hf_user_name: Optional[gr.OAuthProfile] | |
| ): | |
| if not hf_user_name: | |
| raise gr.Error("You must be logged in to use this Space") | |
| if not positive_arxiv_ids: | |
| raise gr.Error("You must enter at least one ArXiv ID") | |
| hf_user_name = hf_user_name.preferred_username | |
| positive_arxiv_ids = positive_arxiv_ids.split(",") | |
| # strip whitespace | |
| positive_arxiv_ids = [arxiv_id.strip() for arxiv_id in positive_arxiv_ids] | |
| potential_authors = query_author(author_name) | |
| if not potential_authors: | |
| raise gr.Error("No authors found with that name") | |
| author = get_author_from_options(potential_authors, positive_arxiv_ids) | |
| papers = get_arxiv_paper(author["papers"]) | |
| papers = sort_by_date(papers) | |
| papers_indexed_by_hf = groupby_indexed_by_hf_papers(papers) | |
| # print(papers_indexed_by_hf[True]) | |
| indexed_papers = [ | |
| lookup_hf_paper(paper["externalIds"]["ArXiv"]) | |
| for paper in papers_indexed_by_hf[True] | |
| ] | |
| already_claimed = groupby_hf_user_papers(indexed_papers, hf_user_name) | |
| if already_claimed.get(False): | |
| results = ( | |
| "# Papers already indexed by Hugging Face which you haven't claimed\n" | |
| + "These papers are already indexed by Hugging Face, but you haven't" | |
| " claimed them yet. You can claim them by clicking on the link to the" | |
| " paper and then clicking on your name in the author list.\n" | |
| ) | |
| for paper in already_claimed[False]: | |
| url = f"https://huggingface.co/papers/{paper['id']}" | |
| results += f"- [{paper['title']}]({url})\n" | |
| else: | |
| results = "You have claimed all papers indexed by Hugging Face!\n" | |
| if papers_indexed_by_hf.get(False): | |
| results += "# Papers not yet indexed by Hugging Face which you can claim\n" | |
| for paper in papers_indexed_by_hf[False]: | |
| paper_title = paper["title"] | |
| arxiv_id = paper["externalIds"]["ArXiv"] | |
| url = f"https://huggingface.co/papers/{arxiv_id}" | |
| results += f"- [{paper_title}]({url})\n" | |
| return results | |
| def get_name(hf_user_name: Optional[gr.OAuthProfile] = None): | |
| return hf_user_name.name if hf_user_name else "" | |
| with gr.Blocks() as demo: | |
| gr.HTML( | |
| "<h1 style='text-align:center;'> 📃 Hugging Face Paper Claimer 📃" | |
| " </h1>" | |
| ) | |
| gr.HTML( | |
| """<div style='text-align:center;'>You can use this Space to help you find arXiv papers you can still claim. | |
| You need to be logged in to use this Space. | |
| Once you login your name will be prepopulated but you can change this if the name you publish under is different.</div>""" | |
| ) | |
| gr.Markdown( | |
| "**NOTE** This Space uses the [Semantic Scholar" | |
| " API](https://www.semanticscholar.org/product/api) to find papers you have" | |
| " authored. Occasionaly this API returns false positives i.e. papers which you" | |
| " did not author" | |
| ) | |
| with gr.Row(): | |
| gr.LoginButton(size="sm") | |
| gr.LogoutButton(size="sm") | |
| author_name = gr.Textbox( | |
| value=get_name, | |
| label="The name you publish under", | |
| interactive=True, | |
| ) | |
| positive_arxiv_ids = gr.Textbox( | |
| placeholder="1910.01108", | |
| label=( | |
| "ArXiv ID for a paper for which you are an author, separate multiple IDs" | |
| " with commas" | |
| ), | |
| interactive=True, | |
| ) | |
| btn = gr.Button("Get papers") | |
| btn.click(get_papers, [author_name, positive_arxiv_ids], gr.Markdown()) | |
| demo.launch(debug=True) | |