Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import joblib | |
| import spacy | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer | |
| from sklearn.preprocessing import MultiLabelBinarizer | |
| from sklearn.base import BaseEstimator, TransformerMixin | |
| nlp = spacy.load('en_core_web_sm') | |
| tfidf = joblib.load('./tfidf.joblib') | |
| model = joblib.load('./model.joblib') | |
| tags_binarizer = joblib.load('./tags.joblib') | |
| def lemmatize(s: str) -> iter: | |
| # tokenize | |
| doc = nlp(s) | |
| # remove punct and stopwords | |
| tokens = filter(lambda token: not token.is_space and not token.is_punct and not token.is_stop and not token.is_digit, doc) | |
| # lemmatize | |
| return map(lambda token: token.lemma_.lower(), tokens) | |
| def plot(tags, proba): | |
| plt.style.use('dark_background') | |
| plt.rcParams.update({'font.size': 16}) | |
| fig, ax = plt.subplots(figsize=(12,9)) | |
| ax.barh(tags, proba, align='center', color='darkred') | |
| ax.set_yticks(tags, labels=tags) | |
| ax.invert_yaxis() # labels read top-to-bottom | |
| ax.set_xlabel('Score') | |
| ax.set_title('Score/Tag') | |
| for i, v in enumerate(proba): | |
| ax.text(v - 0.065, i + 0.05, str(round(v, 2))) | |
| plt.xlim(0, 1) | |
| plt.show() | |
| def predict_words(X): | |
| y_bin = model.predict(X) | |
| y_tags = " ".join(tags_binarizer.inverse_transform(y_bin)[0]) | |
| return y_tags | |
| def proba_chart(X): | |
| y_proba = model.predict_proba(X)[0] | |
| tags = list(dict(sorted(tags_binarizer.ts.count.items())).keys()) | |
| # combine | |
| data = list(zip(tags, y_proba)) | |
| # sort | |
| data = sorted(data, key=lambda tag_value: tag_value[1], reverse=True) | |
| # keep values >= min_score | |
| data = list(filter(lambda tag_value: tag_value[1] >= 0.1, data)) | |
| # we have our two dimensions for chart | |
| tags, proba = zip(*data) | |
| # build chart | |
| plt.style.use('dark_background') | |
| plt.rcParams.update({'font.size': 16}) | |
| fig, ax = plt.subplots(figsize=(12,9)) | |
| ax.barh(tags, proba, align='center', color='darkred') | |
| ax.set_yticks(tags, labels=tags) | |
| ax.invert_yaxis() # labels read top-to-bottom | |
| ax.set_xlabel('Score') | |
| ax.set_title('Score/Tag') | |
| for i, v in enumerate(proba): | |
| ax.text(v - 0.065, i + 0.05, str(round(v, 2))) | |
| plt.xlim(0, 1) | |
| return fig | |
| def predict(title: str , post: str): | |
| text = title + " " + post | |
| lemmes = np.array([' '.join(list(lemmatize(text)))]) | |
| X = tfidf.transform(lemmes) | |
| # predicted words | |
| words = predict_words(X) | |
| # proba chart | |
| chart = proba_chart(X) | |
| return words, chart | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs=[ | |
| gr.Textbox(label="Title", lines=1, placeholder="Title..."), | |
| gr.Textbox(label="Post", lines=20, placeholder="Post...")], | |
| outputs=[gr.Textbox(label="Tags"), gr.Plot()]) | |
| demo.launch() |