Spaces:

mikachou
/

stackoverflow

Sleeping

App Files Files Community

mikachou commited on May 23, 2022

Commit

4b67ac0

1 Parent(s): d743c08

add chart with proba

Browse files

Files changed (1) hide show

app.py +68 -14

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import joblib
 import spacy
 import numpy as np
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.preprocessing import MultiLabelBinarizer
 from sklearn.base import BaseEstimator, TransformerMixin
@@ -21,31 +22,84 @@ def lemmatize(s: str) -> iter:
     # lemmatize
     return map(lambda token: token.lemma_.lower(), tokens)
-def predict(title: str , post: str, predict_proba: bool):
     text = title + " " + post
     lemmes = np.array([' '.join(list(lemmatize(text)))])
     X = tfidf.transform(lemmes)
-    if predict_proba:
-        y_proba = model.predict_proba(X)[0]
-        tags = list(dict(sorted(tags_binarizer.ts.count.items())).keys())
-        result = list(zip(tags, y_proba))
-    else:
-        y_bin = model.predict(X)
-        y_tags = tags_binarizer.inverse_transform(y_bin)
-        result = y_tags
-    return result
 demo = gr.Interface(
     fn=predict,
     inputs=[
         gr.Textbox(label="Title", lines=1, placeholder="Title..."),
-        gr.Textbox(label="Post", lines=10, placeholder="Post..."),
-        gr.Checkbox(label="Proba?")],
-    outputs=gr.Textbox(lines=10))
 demo.launch()

 import joblib
 import spacy
 import numpy as np
+import matplotlib.pyplot as plt
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.preprocessing import MultiLabelBinarizer
 from sklearn.base import BaseEstimator, TransformerMixin
     # lemmatize
     return map(lambda token: token.lemma_.lower(), tokens)
+def plot(tags, proba):
+    plt.style.use('dark_background')
+    plt.rcParams.update({'font.size': 16})
+    fig, ax = plt.subplots(figsize=(12,9))
+    ax.barh(tags, proba, align='center', color='darkred')
+    ax.set_yticks(tags, labels=tags)
+    ax.invert_yaxis()  # labels read top-to-bottom
+    ax.set_xlabel('Score')
+    ax.set_title('Score/Tag')
+    for i, v in enumerate(proba):
+        ax.text(v - 0.065, i + 0.05, str(round(v, 2)))
+    plt.xlim(0, 1)
+    plt.show()
+def predict_words(X):
+    y_bin = model.predict(X)
+    y_tags = "    ".join(tags_binarizer.inverse_transform(y_bin)[0])
+    return y_tags
+def proba_chart(X):
+    y_proba = model.predict_proba(X)[0]
+    tags = list(dict(sorted(tags_binarizer.ts.count.items())).keys())
+    # combine
+    data = list(zip(tags, y_proba))
+    # sort
+    data = sorted(data, key=lambda tag_value: tag_value[1], reverse=True)
+    # keep values >= min_score
+    data = list(filter(lambda tag_value: tag_value[1] >= 0.1, data))
+    # we have our two dimensions for chart
+    tags, proba = zip(*data)
+    # build chart
+    plt.style.use('dark_background')
+    plt.rcParams.update({'font.size': 16})
+    fig, ax = plt.subplots(figsize=(12,9))
+    ax.barh(tags, proba, align='center', color='darkred')
+    ax.set_yticks(tags, labels=tags)
+    ax.invert_yaxis()  # labels read top-to-bottom
+    ax.set_xlabel('Score')
+    ax.set_title('Score/Tag')
+    for i, v in enumerate(proba):
+        ax.text(v - 0.065, i + 0.05, str(round(v, 2)))
+    plt.xlim(0, 1)
+    return fig
+def predict(title: str , post: str):
     text = title + " " + post
     lemmes = np.array([' '.join(list(lemmatize(text)))])
     X = tfidf.transform(lemmes)
+    # predicted words
+    words = predict_words(X)
+    # proba chart
+    chart = proba_chart(X)
+    return words, chart
 demo = gr.Interface(
     fn=predict,
     inputs=[
         gr.Textbox(label="Title", lines=1, placeholder="Title..."),
+        gr.Textbox(label="Post", lines=20, placeholder="Post...")],
+    outputs=[gr.Textbox(label="Tags"), gr.Plot()])
 demo.launch()