Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
machineuser
commited on
Commit
·
9ce2247
1
Parent(s):
30c18c2
Sync widgets demo
Browse files- packages/tasks/src/tasks/depth-estimation/data.ts +5 -3
- packages/tasks/src/tasks/document-question-answering/data.ts +8 -0
- packages/tasks/src/tasks/image-to-text/data.ts +8 -16
- packages/tasks/src/tasks/object-detection/data.ts +5 -2
- packages/tasks/src/tasks/text-to-image/data.ts +13 -11
- packages/tasks/src/tasks/text-to-video/data.ts +3 -3
- packages/tasks/src/tasks/visual-question-answering/data.ts +4 -0
packages/tasks/src/tasks/depth-estimation/data.ts
CHANGED
|
@@ -24,14 +24,16 @@ const taskData: TaskDataCustom = {
|
|
| 24 |
metrics: [],
|
| 25 |
models: [
|
| 26 |
{
|
| 27 |
-
// TO DO: write description
|
| 28 |
description: "Strong Depth Estimation model trained on 1.4 million images.",
|
| 29 |
id: "Intel/dpt-large",
|
| 30 |
},
|
| 31 |
{
|
| 32 |
-
// TO DO: write description
|
| 33 |
description: "Strong Depth Estimation model trained on the KITTI dataset.",
|
| 34 |
-
id: "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
},
|
| 36 |
],
|
| 37 |
spaces: [
|
|
|
|
| 24 |
metrics: [],
|
| 25 |
models: [
|
| 26 |
{
|
|
|
|
| 27 |
description: "Strong Depth Estimation model trained on 1.4 million images.",
|
| 28 |
id: "Intel/dpt-large",
|
| 29 |
},
|
| 30 |
{
|
|
|
|
| 31 |
description: "Strong Depth Estimation model trained on the KITTI dataset.",
|
| 32 |
+
id: "facebook/dpt-dinov2-large-kitti",
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
description: "A strong monocular depth estimation model.",
|
| 36 |
+
id: "Bingxin/Marigold",
|
| 37 |
},
|
| 38 |
],
|
| 39 |
spaces: [
|
packages/tasks/src/tasks/document-question-answering/data.ts
CHANGED
|
@@ -50,6 +50,10 @@ const taskData: TaskDataCustom = {
|
|
| 50 |
description: "A special model for OCR-free Document QA task. Donut model fine-tuned on DocVQA.",
|
| 51 |
id: "naver-clova-ix/donut-base-finetuned-docvqa",
|
| 52 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
],
|
| 54 |
spaces: [
|
| 55 |
{
|
|
@@ -60,6 +64,10 @@ const taskData: TaskDataCustom = {
|
|
| 60 |
description: "An application that can answer questions from invoices.",
|
| 61 |
id: "impira/invoices",
|
| 62 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
],
|
| 64 |
summary:
|
| 65 |
"Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
|
|
|
|
| 50 |
description: "A special model for OCR-free Document QA task. Donut model fine-tuned on DocVQA.",
|
| 51 |
id: "naver-clova-ix/donut-base-finetuned-docvqa",
|
| 52 |
},
|
| 53 |
+
{
|
| 54 |
+
description: "A powerful model for document question answering.",
|
| 55 |
+
id: "google/pix2struct-docvqa-large",
|
| 56 |
+
},
|
| 57 |
],
|
| 58 |
spaces: [
|
| 59 |
{
|
|
|
|
| 64 |
description: "An application that can answer questions from invoices.",
|
| 65 |
id: "impira/invoices",
|
| 66 |
},
|
| 67 |
+
{
|
| 68 |
+
description: "An application to compare different document question answering models.",
|
| 69 |
+
id: "merve/compare_docvqa_models",
|
| 70 |
+
},
|
| 71 |
],
|
| 72 |
summary:
|
| 73 |
"Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
|
packages/tasks/src/tasks/image-to-text/data.ts
CHANGED
|
@@ -32,30 +32,22 @@ const taskData: TaskDataCustom = {
|
|
| 32 |
models: [
|
| 33 |
{
|
| 34 |
description: "A robust image captioning model.",
|
| 35 |
-
id: "Salesforce/
|
| 36 |
},
|
| 37 |
{
|
| 38 |
-
description: "A
|
| 39 |
-
id: "
|
| 40 |
},
|
| 41 |
{
|
| 42 |
description: "A strong optical character recognition model.",
|
| 43 |
-
id: "
|
| 44 |
-
},
|
| 45 |
-
{
|
| 46 |
-
description: "A strong visual question answering model for scientific diagrams.",
|
| 47 |
-
id: "google/pix2struct-ai2d-base",
|
| 48 |
-
},
|
| 49 |
-
{
|
| 50 |
-
description: "A strong captioning model for UI components.",
|
| 51 |
-
id: "google/pix2struct-widget-captioning-base",
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
description: "A captioning model for images that contain text.",
|
| 55 |
-
id: "google/pix2struct-textcaps-base",
|
| 56 |
},
|
| 57 |
],
|
| 58 |
spaces: [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
{
|
| 60 |
description: "A robust image captioning application.",
|
| 61 |
id: "flax-community/image-captioning",
|
|
|
|
| 32 |
models: [
|
| 33 |
{
|
| 34 |
description: "A robust image captioning model.",
|
| 35 |
+
id: "Salesforce/blip2-opt-2.7b",
|
| 36 |
},
|
| 37 |
{
|
| 38 |
+
description: "A powerful and accurate image-to-text model that can also localize concepts in images.",
|
| 39 |
+
id: "microsoft/kosmos-2-patch14-224",
|
| 40 |
},
|
| 41 |
{
|
| 42 |
description: "A strong optical character recognition model.",
|
| 43 |
+
id: "facebook/nougat-base",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
},
|
| 45 |
],
|
| 46 |
spaces: [
|
| 47 |
+
{
|
| 48 |
+
description: "An application that compares various image captioning models.",
|
| 49 |
+
id: "nielsr/comparing-captioning-models",
|
| 50 |
+
},
|
| 51 |
{
|
| 52 |
description: "A robust image captioning application.",
|
| 53 |
id: "flax-community/image-captioning",
|
packages/tasks/src/tasks/object-detection/data.ts
CHANGED
|
@@ -40,7 +40,6 @@ const taskData: TaskDataCustom = {
|
|
| 40 |
],
|
| 41 |
models: [
|
| 42 |
{
|
| 43 |
-
// TO DO: write description
|
| 44 |
description: "Solid object detection model trained on the benchmark dataset COCO 2017.",
|
| 45 |
id: "facebook/detr-resnet-50",
|
| 46 |
},
|
|
@@ -50,9 +49,13 @@ const taskData: TaskDataCustom = {
|
|
| 50 |
},
|
| 51 |
],
|
| 52 |
spaces: [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
{
|
| 54 |
description: "An object detection application that can detect unseen objects out of the box.",
|
| 55 |
-
id: "
|
| 56 |
},
|
| 57 |
{
|
| 58 |
description: "An application that contains various object detection models to try from.",
|
|
|
|
| 40 |
],
|
| 41 |
models: [
|
| 42 |
{
|
|
|
|
| 43 |
description: "Solid object detection model trained on the benchmark dataset COCO 2017.",
|
| 44 |
id: "facebook/detr-resnet-50",
|
| 45 |
},
|
|
|
|
| 49 |
},
|
| 50 |
],
|
| 51 |
spaces: [
|
| 52 |
+
{
|
| 53 |
+
description: "Leaderboard to compare various object detection models across several metrics.",
|
| 54 |
+
id: "hf-vision/object_detection_leaderboard",
|
| 55 |
+
},
|
| 56 |
{
|
| 57 |
description: "An object detection application that can detect unseen objects out of the box.",
|
| 58 |
+
id: "merve/owlv2",
|
| 59 |
},
|
| 60 |
{
|
| 61 |
description: "An application that contains various object detection models to try from.",
|
packages/tasks/src/tasks/text-to-image/data.ts
CHANGED
|
@@ -45,14 +45,12 @@ const taskData: TaskDataCustom = {
|
|
| 45 |
],
|
| 46 |
models: [
|
| 47 |
{
|
| 48 |
-
description:
|
| 49 |
-
|
| 50 |
-
id: "CompVis/stable-diffusion-v1-4",
|
| 51 |
},
|
| 52 |
{
|
| 53 |
-
description:
|
| 54 |
-
|
| 55 |
-
id: "dalle-mini/dalle-mega",
|
| 56 |
},
|
| 57 |
{
|
| 58 |
description: "A text-to-image model that can generate coherent text inside image.",
|
|
@@ -69,19 +67,23 @@ const taskData: TaskDataCustom = {
|
|
| 69 |
id: "stabilityai/stable-diffusion",
|
| 70 |
},
|
| 71 |
{
|
| 72 |
-
description: "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
id: "DeepFloyd/IF",
|
| 74 |
},
|
| 75 |
{
|
| 76 |
-
description: "
|
| 77 |
-
id: "
|
| 78 |
},
|
| 79 |
{
|
| 80 |
-
description: "
|
| 81 |
id: "hysts/Shap-E",
|
| 82 |
},
|
| 83 |
{
|
| 84 |
-
description: "
|
| 85 |
id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI",
|
| 86 |
},
|
| 87 |
],
|
|
|
|
| 45 |
],
|
| 46 |
models: [
|
| 47 |
{
|
| 48 |
+
description: "One of the most powerful image generation models that can generate realistic outputs.",
|
| 49 |
+
id: "stabilityai/stable-diffusion-xl-base-1.0",
|
|
|
|
| 50 |
},
|
| 51 |
{
|
| 52 |
+
description: "A powerful yet fast image generation model.",
|
| 53 |
+
id: "latent-consistency/lcm-lora-sdxl",
|
|
|
|
| 54 |
},
|
| 55 |
{
|
| 56 |
description: "A text-to-image model that can generate coherent text inside image.",
|
|
|
|
| 67 |
id: "stabilityai/stable-diffusion",
|
| 68 |
},
|
| 69 |
{
|
| 70 |
+
description: "A text-to-image application to generate comics.",
|
| 71 |
+
id: "jbilcke-hf/ai-comic-factory",
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
description: "A text-to-image application that can generate coherent text inside the image.",
|
| 75 |
id: "DeepFloyd/IF",
|
| 76 |
},
|
| 77 |
{
|
| 78 |
+
description: "A powerful yet very fast image generation application.",
|
| 79 |
+
id: "latent-consistency/lcm-lora-for-sdxl",
|
| 80 |
},
|
| 81 |
{
|
| 82 |
+
description: "A powerful text-to-image application that can generate 3D representations.",
|
| 83 |
id: "hysts/Shap-E",
|
| 84 |
},
|
| 85 |
{
|
| 86 |
+
description: "An application for `text-to-image`, `image-to-image` and image inpainting.",
|
| 87 |
id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI",
|
| 88 |
},
|
| 89 |
],
|
packages/tasks/src/tasks/text-to-video/data.ts
CHANGED
|
@@ -68,7 +68,7 @@ const taskData: TaskDataCustom = {
|
|
| 68 |
models: [
|
| 69 |
{
|
| 70 |
description: "A strong model for video generation.",
|
| 71 |
-
id: "
|
| 72 |
},
|
| 73 |
{
|
| 74 |
description: "A robust model for text-to-video generation.",
|
|
@@ -76,7 +76,7 @@ const taskData: TaskDataCustom = {
|
|
| 76 |
},
|
| 77 |
{
|
| 78 |
description: "A text-to-video generation model with high quality and smooth outputs.",
|
| 79 |
-
id: "
|
| 80 |
},
|
| 81 |
],
|
| 82 |
spaces: [
|
|
@@ -86,7 +86,7 @@ const taskData: TaskDataCustom = {
|
|
| 86 |
},
|
| 87 |
{
|
| 88 |
description: "An application that generates video from image and text.",
|
| 89 |
-
id: "
|
| 90 |
},
|
| 91 |
{
|
| 92 |
description: "An application that generates videos from text and provides multi-model support.",
|
|
|
|
| 68 |
models: [
|
| 69 |
{
|
| 70 |
description: "A strong model for video generation.",
|
| 71 |
+
id: "Vchitect/LaVie",
|
| 72 |
},
|
| 73 |
{
|
| 74 |
description: "A robust model for text-to-video generation.",
|
|
|
|
| 76 |
},
|
| 77 |
{
|
| 78 |
description: "A text-to-video generation model with high quality and smooth outputs.",
|
| 79 |
+
id: "hotshotco/Hotshot-XL",
|
| 80 |
},
|
| 81 |
],
|
| 82 |
spaces: [
|
|
|
|
| 86 |
},
|
| 87 |
{
|
| 88 |
description: "An application that generates video from image and text.",
|
| 89 |
+
id: "Vchitect/LaVie",
|
| 90 |
},
|
| 91 |
{
|
| 92 |
description: "An application that generates videos from text and provides multi-model support.",
|
packages/tasks/src/tasks/visual-question-answering/data.ts
CHANGED
|
@@ -71,6 +71,10 @@ const taskData: TaskDataCustom = {
|
|
| 71 |
},
|
| 72 |
],
|
| 73 |
spaces: [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
{
|
| 75 |
description: "An application that can answer questions based on images.",
|
| 76 |
id: "nielsr/vilt-vqa",
|
|
|
|
| 71 |
},
|
| 72 |
],
|
| 73 |
spaces: [
|
| 74 |
+
{
|
| 75 |
+
description: "An application that compares visual question answering models across different tasks.",
|
| 76 |
+
id: "merve/pix2struct",
|
| 77 |
+
},
|
| 78 |
{
|
| 79 |
description: "An application that can answer questions based on images.",
|
| 80 |
id: "nielsr/vilt-vqa",
|