lilyhof commited on
Commit
d3dd2d6
·
1 Parent(s): e2769d5

Upload 38 files

Browse files
Files changed (38) hide show
  1. lo-achievement/.DS_Store +0 -0
  2. lo-achievement/.devcontainer/Dockerfile +66 -0
  3. lo-achievement/.devcontainer/devcontainer.json +25 -0
  4. lo-achievement/.github/workflows/deploy.yaml +14 -0
  5. lo-achievement/.github/workflows/test.yaml +7 -0
  6. lo-achievement/.gitignore +151 -0
  7. lo-achievement/LICENSE +201 -0
  8. lo-achievement/MANIFEST.in +5 -0
  9. lo-achievement/README.md +81 -0
  10. lo-achievement/UI_design_oral_exam_baseline_functionality.ipynb +1454 -0
  11. lo-achievement/UI_design_oral_exam_chatbot.ipynb +1004 -0
  12. lo-achievement/ai_classroom_suite/IOHelperUtilities.py +85 -0
  13. lo-achievement/ai_classroom_suite/MediaVectorStores.py +173 -0
  14. lo-achievement/ai_classroom_suite/PromptInteractionBase.py +191 -0
  15. lo-achievement/ai_classroom_suite/SelfStudyPrompts.py +75 -0
  16. lo-achievement/ai_classroom_suite/__init__.py +1 -0
  17. lo-achievement/ai_classroom_suite/_modidx.py +93 -0
  18. lo-achievement/ai_classroom_suite/self_study_app.py +358 -0
  19. lo-achievement/basic_UI_design_oral_exam.ipynb +0 -0
  20. lo-achievement/grading_from_json.ipynb +606 -0
  21. lo-achievement/instructor_intr_notebook.ipynb +3153 -0
  22. lo-achievement/instructor_intr_notebook_example_training.ipynb +1277 -0
  23. lo-achievement/instructor_intr_notebook_grading_training.ipynb +737 -0
  24. lo-achievement/instructor_vector_store_creator.ipynb +333 -0
  25. lo-achievement/nbs/_quarto.yml +20 -0
  26. lo-achievement/nbs/gradio_application.ipynb +0 -0
  27. lo-achievement/nbs/helper_utilities.ipynb +405 -0
  28. lo-achievement/nbs/media_stores.ipynb +920 -0
  29. lo-achievement/nbs/nbdev.yml +9 -0
  30. lo-achievement/nbs/prompt_interaction_base.ipynb +482 -0
  31. lo-achievement/nbs/self_study_prompts.ipynb +342 -0
  32. lo-achievement/nbs/styles.css +37 -0
  33. lo-achievement/prompt_with_context.ipynb +796 -0
  34. lo-achievement/prompt_with_vector_store.ipynb +637 -0
  35. lo-achievement/prompt_with_vector_store_w_grading_intr.ipynb +0 -0
  36. lo-achievement/settings.ini +43 -0
  37. lo-achievement/setup.py +57 -0
  38. lo-achievement/speech_to_text_models.ipynb +0 -0
lo-achievement/.DS_Store ADDED
Binary file (8.2 kB). View file
 
lo-achievement/.devcontainer/Dockerfile ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official image as a parent image.
2
+ FROM ubuntu:22.04
3
+
4
+ # Update the system
5
+ RUN apt-get -y update && apt-get install -y bash \
6
+ build-essential \
7
+ wget \
8
+ make \
9
+ git \
10
+ curl \
11
+ sudo \
12
+ ca-certificates \
13
+ poppler-utils \
14
+ python3 \
15
+ python3-pip \
16
+ nodejs \
17
+ npm && \
18
+ rm -rf /var/lib/apt/lists/*
19
+
20
+ # Install python packages
21
+ RUN python3 -m pip install --no-cache-dir --upgrade pip && \
22
+ python3 -m pip install --no-cache-dir torch torchvision torchaudio && \
23
+ python3 -m pip install --no-cache-dir \
24
+ transformers \
25
+ langchain \
26
+ langchain[llms] \
27
+ pypdf \
28
+ tokenizers \
29
+ sentencepiece \
30
+ openai \
31
+ librosa \
32
+ datasets \
33
+ jupyter \
34
+ jupyterlab \
35
+ pandas \
36
+ openpyxl \
37
+ matplotlib \
38
+ numpy \
39
+ seaborn \
40
+ chromadb \
41
+ tiktoken \
42
+ unstructured \
43
+ deeplake \
44
+ ipyfilechooser \
45
+ ipywidgets \
46
+ widgetsnbextension \
47
+ jupyter_contrib_nbextensions \
48
+ jupyter_nbextensions_configurator \
49
+ jupyterlab-git \
50
+ gradio \
51
+ streamlit \
52
+ nb-clean \
53
+ nbdime \
54
+ nbdev
55
+
56
+ # Enable jupyter nbextension
57
+ # RUN jupyter nbextension enable --py widgetsnbextension
58
+
59
+ # Run nbdev_install_quarto
60
+ RUN sudo nbdev_install_quarto
61
+
62
+ # Indicate the Dockerfile owner
63
+ LABEL maintainer="Charreau Bell"
64
+
65
+ # Command to run on container start
66
+ CMD [ "/bin/bash" ]
lo-achievement/.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "CLAS Container",
3
+ "dockerFile": "Dockerfile",
4
+ //"forwardPorts": [80, 443, 8080],
5
+ "customizations": {
6
+ "vscode": {
7
+ "settings": {
8
+ "terminal.integrated.shell.linux": "/bin/bash"
9
+ },
10
+ "extensions": [
11
+ "ms-python.python",
12
+ "ms-azuretools.vscode-docker",
13
+ "ms-vscode-remote.remote-containers",
14
+ "ms-toolsai.jupyter",
15
+ "ms-toolsai.vscode-jupyter-cell-tags",
16
+ "ms-toolsai.jupyter-renderers",
17
+ "ms-python.vscode-pylance",
18
+ "ms-toolsai.jupyter-keymap"
19
+ ]
20
+ }
21
+ }
22
+ // "mounts": [
23
+ // "source=/host/path,target=/container/path,type=bind"
24
+ //],
25
+ }
lo-achievement/.github/workflows/deploy.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to GitHub Pages
2
+
3
+ permissions:
4
+ contents: write
5
+ pages: write
6
+
7
+ on:
8
+ push:
9
+ branches: [ "main", "master" ]
10
+ workflow_dispatch:
11
+ jobs:
12
+ deploy:
13
+ runs-on: ubuntu-latest
14
+ steps: [uses: fastai/workflows/quarto-ghp@master]
lo-achievement/.github/workflows/test.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ name: CI
2
+ on: [workflow_dispatch, pull_request, push]
3
+
4
+ jobs:
5
+ test:
6
+ runs-on: ubuntu-latest
7
+ steps: [uses: fastai/workflows/nbdev-ci@master]
lo-achievement/.gitignore ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _docs/
2
+ _proc/
3
+
4
+ *.bak
5
+ .gitattributes
6
+ .last_checked
7
+ .gitconfig
8
+ *.bak
9
+ *.log
10
+ *~
11
+ ~*
12
+ _tmp*
13
+ tmp*
14
+ tags
15
+ *.pkg
16
+
17
+ # Byte-compiled / optimized / DLL files
18
+ __pycache__/
19
+ *.py[cod]
20
+ *$py.class
21
+
22
+ # C extensions
23
+ *.so
24
+
25
+ # Distribution / packaging
26
+ .Python
27
+ env/
28
+ build/
29
+ develop-eggs/
30
+ dist/
31
+ downloads/
32
+ eggs/
33
+ .eggs/
34
+ lib/
35
+ lib64/
36
+ parts/
37
+ sdist/
38
+ var/
39
+ wheels/
40
+ *.egg-info/
41
+ .installed.cfg
42
+ *.egg
43
+
44
+ # PyInstaller
45
+ # Usually these files are written by a python script from a template
46
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
47
+ *.manifest
48
+ *.spec
49
+
50
+ # Installer logs
51
+ pip-log.txt
52
+ pip-delete-this-directory.txt
53
+
54
+ # Unit test / coverage reports
55
+ htmlcov/
56
+ .tox/
57
+ .coverage
58
+ .coverage.*
59
+ .cache
60
+ nosetests.xml
61
+ coverage.xml
62
+ *.cover
63
+ .hypothesis/
64
+
65
+ # Translations
66
+ *.mo
67
+ *.pot
68
+
69
+ # Django stuff:
70
+ *.log
71
+ local_settings.py
72
+
73
+ # Flask stuff:
74
+ instance/
75
+ .webassets-cache
76
+
77
+ # Scrapy stuff:
78
+ .scrapy
79
+
80
+ # Sphinx documentation
81
+ docs/_build/
82
+
83
+ # PyBuilder
84
+ target/
85
+
86
+ # Jupyter Notebook
87
+ .ipynb_checkpoints
88
+
89
+ # pyenv
90
+ .python-version
91
+
92
+ # celery beat schedule file
93
+ celerybeat-schedule
94
+
95
+ # SageMath parsed files
96
+ *.sage.py
97
+
98
+ # dotenv
99
+ .env
100
+
101
+ # virtualenv
102
+ .venv
103
+ venv/
104
+ ENV/
105
+
106
+ # Spyder project settings
107
+ .spyderproject
108
+ .spyproject
109
+
110
+ # Rope project settings
111
+ .ropeproject
112
+
113
+ # mkdocs documentation
114
+ /site
115
+
116
+ # mypy
117
+ .mypy_cache/
118
+
119
+ .vscode
120
+ *.swp
121
+
122
+ # osx generated files
123
+ .DS_Store
124
+ .DS_Store?
125
+ .Trashes
126
+ ehthumbs.db
127
+ Thumbs.db
128
+ .idea
129
+
130
+ # pytest
131
+ .pytest_cache
132
+
133
+ # tools/trust-doc-nbs
134
+ docs_src/.last_checked
135
+
136
+ # symlinks to fastai
137
+ docs_src/fastai
138
+ tools/fastai
139
+
140
+ # link checker
141
+ checklink/cookies.txt
142
+
143
+ # .gitconfig is now autogenerated
144
+ .gitconfig
145
+
146
+ # Quarto installer
147
+ .deb
148
+ .pkg
149
+
150
+ # Quarto
151
+ .quarto
lo-achievement/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2022, fastai
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
lo-achievement/MANIFEST.in ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ include settings.ini
2
+ include LICENSE
3
+ include CONTRIBUTING.md
4
+ include README.md
5
+ recursive-exclude * __pycache__
lo-achievement/README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Classroom Learning and Assessment Suite (CLAS)
2
+
3
+ The Classroom Learning and Assessment Suite is a set of AI-driven solutions that complement in-class learning with interactive study and assessment. Primarily ideated by faculty interested in leveraging AI platforms to enhance student learning, the suite creates new mechanisms for both students and faculty members in both supporting high quality learning assessments and streamlining grading of student submissions using generative AI-based platforms.
4
+
5
+ ## Features
6
+ CLAS is suitable for users of diverse programming backgrounds - from no programming background to seasoned programmers. The Suite provides:
7
+
8
+ ### **Prompting Guides**
9
+
10
+ * **[A curated prompt dictionary for self-study](https://github.com/vanderbilt-data-science/lo-achievement/wiki/Guide-to-Learning-Objective-Prompts)**: A set of prompts that have been engineering and evaluated on OpenAI GPT 3.5 and GPT4 to provide numerous types of assessments for students engaging in self-study with instructor provided materials
11
+ * **[A curated prompt dictionary for student assessment](https://github.com/vanderbilt-data-science/lo-achievement/wiki/Bloom's-Taxonomy-Rubric-Prompts)**: Prompts targeted primarily towards instructors who want a generalized rubric for evaluating student submissions
12
+
13
+ ### **Interface-Driven Programs**
14
+ No programming experience? No problem.
15
+
16
+ * **[Student self-study using instructor assigned resources](https://huggingface.co/spaces/vanderbilt-dsi/selfstudy_learningobjectives_demo)**: A point-and-click interface with OpenAI generative AI. Upload your contextual coursework (book chapters, etc), and chat with your own customized tutor!
17
+ * **[Hosted self- and in-class oral exam app]()**: Interested in using oral exams for assessment of student knowledge? Students - want to prepare for oral exams? Visit our point-and-click interface with OpenAI generative AI using your own coursework. You can upload questions provided by the instructor or ask the generative AI to assist in creating questions for you.
18
+
19
+ ### **Google Colab Notebooks**
20
+ Want to evaluate students or want to customize your approach for interacting with generative AI?
21
+
22
+ * **[Instructor Grading Notebook](https://github.com/vanderbilt-data-science/lo-achievement/blob/main/instructor_intr_notebook.ipynb)**: Upload a zip file of the JSON output of your students' exploration and assessment, and get insight on student strengths and weaknesses on the topic as well as structured feedback for all students.
23
+ * **[Instructor Document Store Creation](https://github.com/vanderbilt-data-science/lo-achievement/blob/main/instructor_vector_store_creator.ipynb)**: Store all of your classroom content (pdfs, Youtube videos, website links) in a hosted location for easy accessibility for students working with generative AI platforms.
24
+ * **[Prompting with Inline Context](https://github.com/vanderbilt-data-science/lo-achievement/blob/main/prompt_with_context.ipynb)**: A customizable, programmatic way to interface with generative AI using Google Colab through direct copy/paste of text content
25
+ * **[Prompting with Vector Stores](https://github.com/vanderbilt-data-science/lo-achievement/blob/main/prompt_with_vector_store.ipynb)**: A customizable, programming way for self-study with generative AI using Google Colab through the creation of vector stores (better for larger corpuses of text)
26
+
27
+ **Use the repo!**
28
+ You can also directly clone/use the repo itself or use it as a package for development - this is great for experienced programmers or even those who would like to learn more about development with generative AI. The repo provides:
29
+ - Generative AI and LangChain integration to process sources and create assessments and answer keys
30
+ - Runs on Google Colab, with no additional installations needed
31
+ You can also develop locally as desired and we encourage PR contributions from the community!
32
+
33
+ ## Getting Started
34
+
35
+ There are a variety of ways to use CLAS:
36
+ * **Navigate to the Wiki to explore prompts.** You can copy/paste/amend these in the interfaces provided by OpenAI, Google, Anthropic, etc.
37
+ * **Use Google Colab to interact with notebooks.** Click on the notebook you'd like to enter in the files list above. You will see a blue Open In Colab link in the page that opens. Click this button to start your session in Google Colab, making sure that you're logged in with your Google Account.** It will take a few minutes to spin up and automatically install the required packages.
38
+ * **Study through our hosted app.** Navigate to [CLAS on Huggingface](https://huggingface.co/spaces/vanderbilt-dsi/selfstudy_learningobjectives_demo). Follow the instructions to use the platform for self study.
39
+
40
+ ## Contributing
41
+
42
+ To contribute to the project, please fork the repository and submit a pull request. Our community is supportive, and we provide training and classes if you're new to any of the frameworks used in the project. Everyone is welcome to contribute, as we believe participating in data science and AI projects is an excellent way to learn.
43
+
44
+ ## Community Guidelines
45
+
46
+ We aim to create a welcoming and inclusive community where everyone can feel comfortable and valued, regardless of skill level, background, ability, or identity. To ensure a positive atmosphere, please adhere to our code of conduct and community guidelines.
47
+
48
+ ## Meetings
49
+
50
+ - Sprint Planning & Retrospective: Mondays and Fridays at 10:30 am
51
+ - Demos: Fridays at 3 pm
52
+
53
+ ## Additional Resources
54
+
55
+ - LangChain documentation
56
+ - Introduction to transformers and generative AI on our [YouTube channel](https://www.youtube.com/channel/UC8C2_3L5gR9qLmL7rmb2BdQ)
57
+ - AI Summer and AI Winter sessions (free and open to all)
58
+
59
+ ## Reporting Issues
60
+
61
+ If you encounter a bug, please submit an issue and label it with "Bug." To escalate the issue, email [[email protected]](mailto:[email protected]).
62
+
63
+ ## Contact Information
64
+
65
+ - Organization: Data Science Institute at Vanderbilt University
66
+ - Program: Data Science for Social Good
67
+ - Main Email: [[email protected]](mailto:[email protected])
68
+ - Principal Investigators (PIs)
69
+ - Jesse Spencer-Smith, Ph.D., Chief Data Scientist, Data Science Institute, Vanderbilt University
70
+ - Jesse Blocher, Ph.D., Director of Graduate Studies, Data Science Institute, Vanderbilt University
71
+ - Dr. Yaa Kumah-Crystal, Ph.D., Pediatric Endocrinologist and Professor, Vanderbilt University Medical Center
72
+ - Charreau Bell, Ph.D., Senior Data Scientist, Data Science Institute, Vanderbilt University
73
+ - Staff Lead: [[email protected]](mailto:[email protected])
74
+ - Code Developers:
75
+ - Katrina Rbeiz, Ph.D. Student, Psychology, Vanderbilt University
76
+ - Minwoo Sohn, Graduate Student, Data Science, Vanderbilt University
77
+ - Ricky Sun, Graduate Student, Data Science, Vanderbilt University
78
+ - Eleanor Beers, Graduate Student, Data Science, Vanderbilt University
79
+ - Kevin Chen, Undergraduate, Computer Science, Vanderbilt University
80
+ - Adam Levav, Undergraduate, University of Maryland
81
+ - Varun Koduvayur, Undergraduate
lo-achievement/UI_design_oral_exam_baseline_functionality.ipynb ADDED
@@ -0,0 +1,1454 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "V100",
8
+ "include_colab_link": true
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ }
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "metadata": {
22
+ "id": "view-in-github",
23
+ "colab_type": "text"
24
+ },
25
+ "source": [
26
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/124-implement-baseline-functionality-for-oral-exam-module/UI_design_oral_exam_baseline_functionality.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "markdown",
31
+ "source": [
32
+ "# Project IO Achievement - UI Design (Oral Exam)"
33
+ ],
34
+ "metadata": {
35
+ "id": "PIbogPXyM0wr"
36
+ }
37
+ },
38
+ {
39
+ "cell_type": "markdown",
40
+ "source": [
41
+ "## Problem Definition\n",
42
+ "\n",
43
+ "The v1 functionality for the Oral Exam module requires the following:\n",
44
+ "\n",
45
+ "1. Upload or generation of questions: either the user should upload a set of questions or we should allow the model to generate the questions. The user should pick or it should be inherent if there is no upload of questions. Note that we must also allow for context to be uploaded (vector store, vector store link, specific documents)\n",
46
+ "2. The model should prompt the user with a question and pause.\n",
47
+ "The user should respond by audio.\n",
48
+ "3. This should continue on until some final point where the exam is over.\n",
49
+ "\n",
50
+ "Then:\n",
51
+ "\n",
52
+ "1. We should use Whisper to do the transcription, and\n",
53
+ "2. Send the transcription, questions, and context for GPT4 for evaluation\n",
54
+ "Return the evaluation.\n",
55
+ "3. This will primarily be work on a user interface."
56
+ ],
57
+ "metadata": {
58
+ "id": "x_Vp8SiKM4p1"
59
+ }
60
+ },
61
+ {
62
+ "cell_type": "markdown",
63
+ "source": [
64
+ "## Libraries\n",
65
+ "\n",
66
+ "This section will install and import some important libraries such as Langchain, openai, Gradio, and so on"
67
+ ],
68
+ "metadata": {
69
+ "id": "o_60X8H3NEne"
70
+ }
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "source": [
75
+ "# install libraries here\n",
76
+ "# -q flag for \"quiet\" install\n",
77
+ "%%capture\n",
78
+ "!pip install -q langchain\n",
79
+ "!pip install -q openai\n",
80
+ "!pip install -q gradio\n",
81
+ "!pip install -q transformers\n",
82
+ "!pip install -q datasets\n",
83
+ "!pip install -q huggingsound\n",
84
+ "!pip install -q torchaudio\n",
85
+ "!pip install -q git+https://github.com/openai/whisper.git\n",
86
+ "!pip install -q docx\n",
87
+ "!pip install -q PyPDF2\n",
88
+ "!pip install python-docx"
89
+ ],
90
+ "metadata": {
91
+ "id": "pxcqXgg2aAN7"
92
+ },
93
+ "execution_count": 1,
94
+ "outputs": []
95
+ },
96
+ {
97
+ "cell_type": "code",
98
+ "execution_count": 2,
99
+ "metadata": {
100
+ "id": "pEjM1tLsMZBq"
101
+ },
102
+ "outputs": [],
103
+ "source": [
104
+ "# import libraries here\n",
105
+ "from langchain.llms import OpenAI\n",
106
+ "from langchain.prompts import PromptTemplate\n",
107
+ "from langchain.document_loaders import TextLoader\n",
108
+ "from langchain.indexes import VectorstoreIndexCreator\n",
109
+ "from langchain import ConversationChain, LLMChain, PromptTemplate\n",
110
+ "from langchain.chat_models import ChatOpenAI\n",
111
+ "from langchain.memory import ConversationBufferWindowMemory\n",
112
+ "from langchain.prompts import ChatPromptTemplate\n",
113
+ "from langchain.text_splitter import CharacterTextSplitter\n",
114
+ "from langchain.embeddings import OpenAIEmbeddings\n",
115
+ "from langchain.schema import SystemMessage, HumanMessage, AIMessage\n",
116
+ "import openai\n",
117
+ "import os\n",
118
+ "from getpass import getpass\n",
119
+ "from IPython.display import display, Javascript, HTML\n",
120
+ "from google.colab.output import eval_js\n",
121
+ "from base64 import b64decode\n",
122
+ "import ipywidgets as widgets\n",
123
+ "from IPython.display import clear_output\n",
124
+ "import time\n",
125
+ "import requests\n",
126
+ "from transformers import WhisperProcessor, WhisperForConditionalGeneration\n",
127
+ "from datasets import load_dataset\n",
128
+ "# from torchaudio.transforms import Resample\n",
129
+ "import whisper\n",
130
+ "from huggingsound import SpeechRecognitionModel\n",
131
+ "import numpy as np\n",
132
+ "import torch\n",
133
+ "import librosa\n",
134
+ "from datasets import load_dataset\n",
135
+ "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n",
136
+ "from jiwer import wer\n",
137
+ "import pandas as pd\n",
138
+ "from IPython.display import display, HTML\n",
139
+ "import gradio as gr\n",
140
+ "from transformers import pipeline\n",
141
+ "from docx import Document\n",
142
+ "import PyPDF2\n",
143
+ "from pydub import AudioSegment\n",
144
+ "import tempfile\n",
145
+ "import os\n"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "markdown",
150
+ "source": [
151
+ "## API Keys\n",
152
+ "\n",
153
+ "Use these cells to load the API keys required for this notebook. The below code cell uses the `getpass` library."
154
+ ],
155
+ "metadata": {
156
+ "id": "03KLZGI_a5W5"
157
+ }
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "source": [
162
+ "openai_api_key = getpass()\n",
163
+ "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
164
+ "openai.api_key = openai_api_key"
165
+ ],
166
+ "metadata": {
167
+ "id": "5smcWj4DbFgy",
168
+ "outputId": "9a73707b-1a6a-4253-b7d8-181a82b1040f",
169
+ "colab": {
170
+ "base_uri": "https://localhost:8080/"
171
+ }
172
+ },
173
+ "execution_count": 3,
174
+ "outputs": [
175
+ {
176
+ "name": "stdout",
177
+ "output_type": "stream",
178
+ "text": [
179
+ "··········\n"
180
+ ]
181
+ }
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "markdown",
186
+ "source": [
187
+ "## Prompt Design\n",
188
+ "\n",
189
+ "To be added"
190
+ ],
191
+ "metadata": {
192
+ "id": "pMo9x8u4AEV1"
193
+ }
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "source": [
198
+ "chat = ChatOpenAI(temperature=0.0, model_name='gpt-3.5-turbo-16k')\n",
199
+ "chat"
200
+ ],
201
+ "metadata": {
202
+ "colab": {
203
+ "base_uri": "https://localhost:8080/"
204
+ },
205
+ "id": "UgnCZRMhADvo",
206
+ "outputId": "1bd6b84d-3ea8-49ba-8156-701f4155d69c"
207
+ },
208
+ "execution_count": 4,
209
+ "outputs": [
210
+ {
211
+ "output_type": "execute_result",
212
+ "data": {
213
+ "text/plain": [
214
+ "ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo-16k', temperature=0.0, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_model_name=None)"
215
+ ]
216
+ },
217
+ "metadata": {},
218
+ "execution_count": 4
219
+ }
220
+ ]
221
+ },
222
+ {
223
+ "cell_type": "code",
224
+ "source": [
225
+ "# This is what I used to test the function 'generate_questions'\n",
226
+ "template_string2 = \"\"\"\n",
227
+ "You are teacher, and you will be given a {context} that is related to the presentation topic.\n",
228
+ "\n",
229
+ "Please generate a questions based on the context above and transcript that student created . \\\n",
230
+ "\n",
231
+ "The audio file generated by student is shown below: {transcribed_text}. \\\n",
232
+ "\"\"\""
233
+ ],
234
+ "metadata": {
235
+ "id": "WmysQZAhKBli"
236
+ },
237
+ "execution_count": null,
238
+ "outputs": []
239
+ },
240
+ {
241
+ "cell_type": "code",
242
+ "source": [
243
+ "prompt_template1 = ChatPromptTemplate.from_template(template_string2)"
244
+ ],
245
+ "metadata": {
246
+ "id": "oij6W5rwAaGb"
247
+ },
248
+ "execution_count": null,
249
+ "outputs": []
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "source": [
254
+ "# prompt_template.messages[0].prompt\n",
255
+ "prompt_template1.messages[0].prompt.input_variables"
256
+ ],
257
+ "metadata": {
258
+ "colab": {
259
+ "base_uri": "https://localhost:8080/"
260
+ },
261
+ "id": "C1YRmL46AaJA",
262
+ "outputId": "e850524a-3831-4113-c796-5a0ec8584569"
263
+ },
264
+ "execution_count": null,
265
+ "outputs": [
266
+ {
267
+ "output_type": "execute_result",
268
+ "data": {
269
+ "text/plain": [
270
+ "['context', 'transcribed_text']"
271
+ ]
272
+ },
273
+ "metadata": {},
274
+ "execution_count": 25
275
+ }
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "source": [
281
+ "# This template is used for testing the function 'ai_evaluate'\n",
282
+ "# Detailed evaluation metrics are to be added\n",
283
+ "template_string3 = \"\"\"\n",
284
+ "You are teacher, and you will be given a context that is related to the presentation topic. \\\n",
285
+ "Now, given {context}, evaluate the answer based on the accuracy\n",
286
+ "\n",
287
+ "The main answer generated by student is shown below: {transcribed_text}. \\\n",
288
+ "The questions are shown below: {questions}. \\\n",
289
+ "The questions answered by student is shown below: {transcribed_qa}. \\\n",
290
+ "\"\"\"\n",
291
+ "prompt_template2 = ChatPromptTemplate.from_template(template_string3)\n",
292
+ "prompt_template2.messages[0].prompt.input_variables\n"
293
+ ],
294
+ "metadata": {
295
+ "id": "141Cxa2MT-l7",
296
+ "colab": {
297
+ "base_uri": "https://localhost:8080/"
298
+ },
299
+ "outputId": "0374d45d-a7f6-41e7-aed0-44c61681de21"
300
+ },
301
+ "execution_count": null,
302
+ "outputs": [
303
+ {
304
+ "output_type": "execute_result",
305
+ "data": {
306
+ "text/plain": [
307
+ "['context', 'questions', 'transcribed_qa', 'transcribed_text']"
308
+ ]
309
+ },
310
+ "metadata": {},
311
+ "execution_count": 7
312
+ }
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "markdown",
317
+ "source": [
318
+ "## Integrate Prompts from LO project"
319
+ ],
320
+ "metadata": {
321
+ "id": "MJCHl1T2TPWC"
322
+ }
323
+ },
324
+ {
325
+ "cell_type": "markdown",
326
+ "source": [
327
+ "### Creating a Chain for Short Answer Generation"
328
+ ],
329
+ "metadata": {
330
+ "id": "IPTyUOl-WdiL"
331
+ }
332
+ },
333
+ {
334
+ "cell_type": "markdown",
335
+ "source": [
336
+ "In this example, the context would include the poem \"The Road Not Taken\" by Robert Frost"
337
+ ],
338
+ "metadata": {
339
+ "id": "203qBjZvmFK1"
340
+ }
341
+ },
342
+ {
343
+ "cell_type": "code",
344
+ "source": [
345
+ "# This is what I used to test the function 'generate_questions_v2'\n",
346
+ "template_string = \"\"\"\n",
347
+ "You are a world-class tutor helping students to perform better on oral and written exams though interactive experiences.\"\n",
348
+ "\n",
349
+ "The following text should be used as the basis for the instructions which follow: {context} \\\n",
350
+ "\n",
351
+ "The following is the guideline for generating the questiion: {pre_prompt}\n",
352
+ "\"\"\""
353
+ ],
354
+ "metadata": {
355
+ "id": "w1AjHwIoVnvw"
356
+ },
357
+ "execution_count": 5,
358
+ "outputs": []
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "source": [
363
+ "prompt_template = ChatPromptTemplate.from_template(template_string)\n",
364
+ "prompt_template.messages[0].prompt.input_variables"
365
+ ],
366
+ "metadata": {
367
+ "colab": {
368
+ "base_uri": "https://localhost:8080/"
369
+ },
370
+ "id": "39-lm5I-Wlep",
371
+ "outputId": "44b39930-5258-484b-8c7d-c36ff4b5dc1a"
372
+ },
373
+ "execution_count": 6,
374
+ "outputs": [
375
+ {
376
+ "output_type": "execute_result",
377
+ "data": {
378
+ "text/plain": [
379
+ "['context', 'pre_prompt']"
380
+ ]
381
+ },
382
+ "metadata": {},
383
+ "execution_count": 6
384
+ }
385
+ ]
386
+ },
387
+ {
388
+ "cell_type": "markdown",
389
+ "source": [
390
+ "### Creating a Chain for AI Evaluation"
391
+ ],
392
+ "metadata": {
393
+ "id": "-Pfxkcdxh9nZ"
394
+ }
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "source": [
399
+ "template_evaluation = \"\"\"\n",
400
+ "Given the follwing {context} and the {transcript}, evaluate whether or not the student answered correctly on the {question}.\n",
401
+ "\"\"\""
402
+ ],
403
+ "metadata": {
404
+ "id": "u6pH1x-gWnFF"
405
+ },
406
+ "execution_count": 7,
407
+ "outputs": []
408
+ },
409
+ {
410
+ "cell_type": "code",
411
+ "source": [
412
+ "# @title\n",
413
+ "prompt_template2 = ChatPromptTemplate.from_template(template_evaluation)\n",
414
+ "prompt_template2.messages[0].prompt.input_variables"
415
+ ],
416
+ "metadata": {
417
+ "colab": {
418
+ "base_uri": "https://localhost:8080/"
419
+ },
420
+ "id": "YPO_IE5ThC6W",
421
+ "outputId": "5361929c-cf8c-483d-901a-ed14a0db89fa"
422
+ },
423
+ "execution_count": 8,
424
+ "outputs": [
425
+ {
426
+ "output_type": "execute_result",
427
+ "data": {
428
+ "text/plain": [
429
+ "['context', 'question', 'transcript']"
430
+ ]
431
+ },
432
+ "metadata": {},
433
+ "execution_count": 8
434
+ }
435
+ ]
436
+ },
437
+ {
438
+ "cell_type": "markdown",
439
+ "source": [
440
+ "## UI Design\n",
441
+ "\n",
442
+ "https://colab.research.google.com/github/petewarden/openai-whisper-webapp/blob/main/OpenAI_Whisper_ASR_Demo.ipynb"
443
+ ],
444
+ "metadata": {
445
+ "id": "M6IzVTjz5cex"
446
+ }
447
+ },
448
+ {
449
+ "cell_type": "markdown",
450
+ "source": [
451
+ "### Functions"
452
+ ],
453
+ "metadata": {
454
+ "id": "l4o8R5eUE1n8"
455
+ }
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "source": [
460
+ "def embed_key(openai_api_key):\n",
461
+ " os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
462
+ "\n",
463
+ "def transcribe(audio_file_path):\n",
464
+ " with open(audio_file_path, \"rb\") as audio_file:\n",
465
+ " # Call OpenAI's Whisper model for transcription\n",
466
+ " transcript = openai.Audio.transcribe(\"whisper-1\", audio_file)\n",
467
+ " transcribed_text = transcript[\"text\"]\n",
468
+ " return transcribed_text\n",
469
+ "\n",
470
+ "def translate(text):\n",
471
+ " # Create a prompt template (This will be changed later to fit the actual task)\n",
472
+ " # Here translation will be a filler task of GPT\n",
473
+ " test_input1 = prompt_template.format_messages(\n",
474
+ " expertise='Language Translation',\n",
475
+ " language='Japanese',\n",
476
+ " style='romantic',\n",
477
+ " transcribed_text=text)\n",
478
+ "\n",
479
+ " response = chat.predict_messages(test_input1)\n",
480
+ " return response.content\n",
481
+ "\n",
482
+ "def process_file(files):\n",
483
+ " for file in files:\n",
484
+ " try:\n",
485
+ " extension = file.name.split('.')[-1].lower()\n",
486
+ " if extension == 'docx':\n",
487
+ " doc = Document(file.name)\n",
488
+ " full_text = []\n",
489
+ " for paragraph in doc.paragraphs:\n",
490
+ " full_text.append(paragraph.text)\n",
491
+ " return '\\n'.join(full_text)\n",
492
+ "\n",
493
+ " elif extension == 'pdf':\n",
494
+ " pdf_file = open(file.name, 'rb')\n",
495
+ " reader = PyPDF2.PdfReader(pdf_file)\n",
496
+ " num_pages = len(reader.pages)\n",
497
+ " full_text = []\n",
498
+ " for page in range(num_pages):\n",
499
+ " page_obj = reader.pages[page]\n",
500
+ " full_text.append(page_obj.extract_text())\n",
501
+ " pdf_file.close()\n",
502
+ " return '\\n'.join(full_text)\n",
503
+ "\n",
504
+ " elif extension == 'txt':\n",
505
+ " with open(file.name, 'r') as txt_file:\n",
506
+ " full_text = txt_file.read()\n",
507
+ " return full_text\n",
508
+ "\n",
509
+ " else:\n",
510
+ " return \"Unsupported file type\"\n",
511
+ " except FileNotFoundError:\n",
512
+ " return \"File not found\"\n",
513
+ " except PermissionError:\n",
514
+ " return \"Permission denied\"\n",
515
+ "\n",
516
+ "def generate_questions(context, transcript):\n",
517
+ " text = process_file(context)\n",
518
+ " test_input1 = prompt_template1.format_messages(\n",
519
+ " context = text,\n",
520
+ " transcribed_text = transcript)\n",
521
+ "\n",
522
+ " response = chat(test_input1)\n",
523
+ " return response.content\n",
524
+ "\n",
525
+ "def generate_questions_v2(text, prompt):\n",
526
+ " #text = process_file(file)\n",
527
+ " test_input1 = prompt_template.format_messages(\n",
528
+ " context = text,\n",
529
+ " pre_prompt = prompt)\n",
530
+ "\n",
531
+ " response = chat(test_input1)\n",
532
+ " return response\n",
533
+ "\n",
534
+ "# def ai_evaluate(context, audio_main, audio_qa, questions):\n",
535
+ "# test_input1 = prompt_template2.format_messages(\n",
536
+ "# context = context,\n",
537
+ "# transcribed_text = audio_main,\n",
538
+ "# transcribed_qa = audio_qa,\n",
539
+ "# questions = questions)\n",
540
+ "\n",
541
+ "# response = chat(test_input1)\n",
542
+ "# return response.content\n",
543
+ "\n",
544
+ "def ai_evaluate_v2(text, audio_main, questions):\n",
545
+ " #audio = transcribe(audio_main)\n",
546
+ " test_input1 = prompt_template2.format_messages(\n",
547
+ " context = text,\n",
548
+ " transcript = audio_main,\n",
549
+ " question = questions\n",
550
+ " )\n",
551
+ "\n",
552
+ " response = chat(test_input1)\n",
553
+ " return response.content\n",
554
+ "\n",
555
+ "def upload_file(files):\n",
556
+ " file_paths = [file.name for file in files]\n",
557
+ " return file_paths"
558
+ ],
559
+ "metadata": {
560
+ "id": "ABN0X9xQHeii"
561
+ },
562
+ "execution_count": 12,
563
+ "outputs": []
564
+ },
565
+ {
566
+ "cell_type": "markdown",
567
+ "source": [
568
+ "### Test process_file"
569
+ ],
570
+ "metadata": {
571
+ "id": "a3WUL_hFyMkr"
572
+ }
573
+ },
574
+ {
575
+ "cell_type": "code",
576
+ "source": [
577
+ "from google.colab import files\n",
578
+ "def upload_syllabi():\n",
579
+ " uploaded = files.upload()\n",
580
+ " for name, data in uploaded.items():\n",
581
+ " with open(name, 'wb') as f:\n",
582
+ " f.write(data)\n",
583
+ " print('saved file', name)\n",
584
+ "upload_syllabi()"
585
+ ],
586
+ "metadata": {
587
+ "colab": {
588
+ "base_uri": "https://localhost:8080/",
589
+ "height": 90
590
+ },
591
+ "id": "nih4FXX0Pl9U",
592
+ "outputId": "ce48c70a-d52c-4267-f3fc-8b22404448d7"
593
+ },
594
+ "execution_count": 13,
595
+ "outputs": [
596
+ {
597
+ "output_type": "display_data",
598
+ "data": {
599
+ "text/plain": [
600
+ "<IPython.core.display.HTML object>"
601
+ ],
602
+ "text/html": [
603
+ "\n",
604
+ " <input type=\"file\" id=\"files-c72e8e8a-ac6f-48ab-9fb4-a84b1483268a\" name=\"files[]\" multiple disabled\n",
605
+ " style=\"border:none\" />\n",
606
+ " <output id=\"result-c72e8e8a-ac6f-48ab-9fb4-a84b1483268a\">\n",
607
+ " Upload widget is only available when the cell has been executed in the\n",
608
+ " current browser session. Please rerun this cell to enable.\n",
609
+ " </output>\n",
610
+ " <script>// Copyright 2017 Google LLC\n",
611
+ "//\n",
612
+ "// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
613
+ "// you may not use this file except in compliance with the License.\n",
614
+ "// You may obtain a copy of the License at\n",
615
+ "//\n",
616
+ "// http://www.apache.org/licenses/LICENSE-2.0\n",
617
+ "//\n",
618
+ "// Unless required by applicable law or agreed to in writing, software\n",
619
+ "// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
620
+ "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
621
+ "// See the License for the specific language governing permissions and\n",
622
+ "// limitations under the License.\n",
623
+ "\n",
624
+ "/**\n",
625
+ " * @fileoverview Helpers for google.colab Python module.\n",
626
+ " */\n",
627
+ "(function(scope) {\n",
628
+ "function span(text, styleAttributes = {}) {\n",
629
+ " const element = document.createElement('span');\n",
630
+ " element.textContent = text;\n",
631
+ " for (const key of Object.keys(styleAttributes)) {\n",
632
+ " element.style[key] = styleAttributes[key];\n",
633
+ " }\n",
634
+ " return element;\n",
635
+ "}\n",
636
+ "\n",
637
+ "// Max number of bytes which will be uploaded at a time.\n",
638
+ "const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
639
+ "\n",
640
+ "function _uploadFiles(inputId, outputId) {\n",
641
+ " const steps = uploadFilesStep(inputId, outputId);\n",
642
+ " const outputElement = document.getElementById(outputId);\n",
643
+ " // Cache steps on the outputElement to make it available for the next call\n",
644
+ " // to uploadFilesContinue from Python.\n",
645
+ " outputElement.steps = steps;\n",
646
+ "\n",
647
+ " return _uploadFilesContinue(outputId);\n",
648
+ "}\n",
649
+ "\n",
650
+ "// This is roughly an async generator (not supported in the browser yet),\n",
651
+ "// where there are multiple asynchronous steps and the Python side is going\n",
652
+ "// to poll for completion of each step.\n",
653
+ "// This uses a Promise to block the python side on completion of each step,\n",
654
+ "// then passes the result of the previous step as the input to the next step.\n",
655
+ "function _uploadFilesContinue(outputId) {\n",
656
+ " const outputElement = document.getElementById(outputId);\n",
657
+ " const steps = outputElement.steps;\n",
658
+ "\n",
659
+ " const next = steps.next(outputElement.lastPromiseValue);\n",
660
+ " return Promise.resolve(next.value.promise).then((value) => {\n",
661
+ " // Cache the last promise value to make it available to the next\n",
662
+ " // step of the generator.\n",
663
+ " outputElement.lastPromiseValue = value;\n",
664
+ " return next.value.response;\n",
665
+ " });\n",
666
+ "}\n",
667
+ "\n",
668
+ "/**\n",
669
+ " * Generator function which is called between each async step of the upload\n",
670
+ " * process.\n",
671
+ " * @param {string} inputId Element ID of the input file picker element.\n",
672
+ " * @param {string} outputId Element ID of the output display.\n",
673
+ " * @return {!Iterable<!Object>} Iterable of next steps.\n",
674
+ " */\n",
675
+ "function* uploadFilesStep(inputId, outputId) {\n",
676
+ " const inputElement = document.getElementById(inputId);\n",
677
+ " inputElement.disabled = false;\n",
678
+ "\n",
679
+ " const outputElement = document.getElementById(outputId);\n",
680
+ " outputElement.innerHTML = '';\n",
681
+ "\n",
682
+ " const pickedPromise = new Promise((resolve) => {\n",
683
+ " inputElement.addEventListener('change', (e) => {\n",
684
+ " resolve(e.target.files);\n",
685
+ " });\n",
686
+ " });\n",
687
+ "\n",
688
+ " const cancel = document.createElement('button');\n",
689
+ " inputElement.parentElement.appendChild(cancel);\n",
690
+ " cancel.textContent = 'Cancel upload';\n",
691
+ " const cancelPromise = new Promise((resolve) => {\n",
692
+ " cancel.onclick = () => {\n",
693
+ " resolve(null);\n",
694
+ " };\n",
695
+ " });\n",
696
+ "\n",
697
+ " // Wait for the user to pick the files.\n",
698
+ " const files = yield {\n",
699
+ " promise: Promise.race([pickedPromise, cancelPromise]),\n",
700
+ " response: {\n",
701
+ " action: 'starting',\n",
702
+ " }\n",
703
+ " };\n",
704
+ "\n",
705
+ " cancel.remove();\n",
706
+ "\n",
707
+ " // Disable the input element since further picks are not allowed.\n",
708
+ " inputElement.disabled = true;\n",
709
+ "\n",
710
+ " if (!files) {\n",
711
+ " return {\n",
712
+ " response: {\n",
713
+ " action: 'complete',\n",
714
+ " }\n",
715
+ " };\n",
716
+ " }\n",
717
+ "\n",
718
+ " for (const file of files) {\n",
719
+ " const li = document.createElement('li');\n",
720
+ " li.append(span(file.name, {fontWeight: 'bold'}));\n",
721
+ " li.append(span(\n",
722
+ " `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
723
+ " `last modified: ${\n",
724
+ " file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
725
+ " 'n/a'} - `));\n",
726
+ " const percent = span('0% done');\n",
727
+ " li.appendChild(percent);\n",
728
+ "\n",
729
+ " outputElement.appendChild(li);\n",
730
+ "\n",
731
+ " const fileDataPromise = new Promise((resolve) => {\n",
732
+ " const reader = new FileReader();\n",
733
+ " reader.onload = (e) => {\n",
734
+ " resolve(e.target.result);\n",
735
+ " };\n",
736
+ " reader.readAsArrayBuffer(file);\n",
737
+ " });\n",
738
+ " // Wait for the data to be ready.\n",
739
+ " let fileData = yield {\n",
740
+ " promise: fileDataPromise,\n",
741
+ " response: {\n",
742
+ " action: 'continue',\n",
743
+ " }\n",
744
+ " };\n",
745
+ "\n",
746
+ " // Use a chunked sending to avoid message size limits. See b/62115660.\n",
747
+ " let position = 0;\n",
748
+ " do {\n",
749
+ " const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
750
+ " const chunk = new Uint8Array(fileData, position, length);\n",
751
+ " position += length;\n",
752
+ "\n",
753
+ " const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
754
+ " yield {\n",
755
+ " response: {\n",
756
+ " action: 'append',\n",
757
+ " file: file.name,\n",
758
+ " data: base64,\n",
759
+ " },\n",
760
+ " };\n",
761
+ "\n",
762
+ " let percentDone = fileData.byteLength === 0 ?\n",
763
+ " 100 :\n",
764
+ " Math.round((position / fileData.byteLength) * 100);\n",
765
+ " percent.textContent = `${percentDone}% done`;\n",
766
+ "\n",
767
+ " } while (position < fileData.byteLength);\n",
768
+ " }\n",
769
+ "\n",
770
+ " // All done.\n",
771
+ " yield {\n",
772
+ " response: {\n",
773
+ " action: 'complete',\n",
774
+ " }\n",
775
+ " };\n",
776
+ "}\n",
777
+ "\n",
778
+ "scope.google = scope.google || {};\n",
779
+ "scope.google.colab = scope.google.colab || {};\n",
780
+ "scope.google.colab._files = {\n",
781
+ " _uploadFiles,\n",
782
+ " _uploadFilesContinue,\n",
783
+ "};\n",
784
+ "})(self);\n",
785
+ "</script> "
786
+ ]
787
+ },
788
+ "metadata": {}
789
+ },
790
+ {
791
+ "output_type": "stream",
792
+ "name": "stdout",
793
+ "text": [
794
+ "Saving instructor_note_2.docx to instructor_note_2.docx\n",
795
+ "saved file instructor_note_2.docx\n"
796
+ ]
797
+ }
798
+ ]
799
+ },
800
+ {
801
+ "cell_type": "code",
802
+ "source": [
803
+ "# Might need some way to make pdf file to load more readable\n",
804
+ "# process_file('/content/instrutor_note.docx')\n",
805
+ "# process_file('/content/Big Data & Economics.pdf')\n",
806
+ "# process_file('/content/Big Data & Economics.pdf')"
807
+ ],
808
+ "metadata": {
809
+ "id": "LJX1AKTMyVm8"
810
+ },
811
+ "execution_count": null,
812
+ "outputs": []
813
+ },
814
+ {
815
+ "cell_type": "markdown",
816
+ "source": [
817
+ "### Gradio Interface V1"
818
+ ],
819
+ "metadata": {
820
+ "id": "c4s5o8baE6wN"
821
+ }
822
+ },
823
+ {
824
+ "cell_type": "code",
825
+ "source": [
826
+ "# @title\n",
827
+ "# with gr.Blocks() as demo:\n",
828
+ "# gr.Markdown(\"# Oral Exam App\")\n",
829
+ "# with gr.Box():\n",
830
+ "# gr.HTML(\"\"\"Embed your OpenAI API key below; if you haven't created one already, visit\n",
831
+ "# platform.openai.com/account/api-keys\n",
832
+ "# to sign up for an account and get your personal API key\"\"\",\n",
833
+ "# elem_classes=\"textbox_label\")\n",
834
+ "# input = gr.Textbox(show_label=False, type=\"password\", container=False,\n",
835
+ "# placeholder=\"●●●●●●●●●●●●●●●●●\")\n",
836
+ "# input.change(fn=embed_key, inputs=input, outputs=None)\n",
837
+ "\n",
838
+ "# with gr.Blocks():\n",
839
+ "# gr.Markdown(\"## Upload your audio file or start recording\")\n",
840
+ "\n",
841
+ "# with gr.Row():\n",
842
+ "\n",
843
+ "\n",
844
+ "# with gr.Column():\n",
845
+ "# file_input = gr.Files(label=\"Load a mp3 file\",\n",
846
+ "# file_types=['.mp3'], type=\"file\",\n",
847
+ "# elem_classes=\"short-height\")\n",
848
+ "# record_inputs = gr.Audio(source=\"microphone\", type=\"filepath\")\n",
849
+ "\n",
850
+ "# with gr.Column():\n",
851
+ "# outputs_transcribe=gr.Textbox(label=\"Transcription\")\n",
852
+ "\n",
853
+ "# with gr.Row():\n",
854
+ "# btn1 = gr.Button(value=\"Transcribe recorded audio\")\n",
855
+ "# btn1.click(transcribe, inputs=record_inputs, outputs=outputs_transcribe)\n",
856
+ "# btn2 = gr.Button(value=\"Transcribe uploaded audio\")\n",
857
+ "# btn2.click(transcribe, inputs=file_input, outputs=outputs_transcribe)\n",
858
+ "\n",
859
+ "# outputs_translate=gr.Textbox(label=\"Translation\")\n",
860
+ "# btn3 = gr.Button(value=\"Translate\")\n",
861
+ "# btn3.click(translate, inputs=outputs_transcribe, outputs=outputs_translate)\n",
862
+ "\n",
863
+ "# demo.launch()\n"
864
+ ],
865
+ "metadata": {
866
+ "id": "ZkfJXCGDFhdw",
867
+ "cellView": "form"
868
+ },
869
+ "execution_count": null,
870
+ "outputs": []
871
+ },
872
+ {
873
+ "cell_type": "markdown",
874
+ "source": [
875
+ "### baseline functionality V1"
876
+ ],
877
+ "metadata": {
878
+ "id": "AnkuosJ7Vw4z"
879
+ }
880
+ },
881
+ {
882
+ "cell_type": "code",
883
+ "source": [
884
+ "# @title\n",
885
+ "with gr.Blocks() as demo:\n",
886
+ " gr.Markdown(\"# Oral Exam App\")\n",
887
+ " gr.Markdown(\"## OpenAI API key\")\n",
888
+ " with gr.Box():\n",
889
+ " gr.HTML(\"\"\"Embed your OpenAI API key below; if you haven't created one already, visit\n",
890
+ " platform.openai.com/account/api-keys\n",
891
+ " to sign up for an account and get your personal API key\"\"\",\n",
892
+ " elem_classes=\"textbox_label\")\n",
893
+ " input = gr.Textbox(show_label=False, type=\"password\", container=False,\n",
894
+ " placeholder=\"●●●●●●●●●●●●●●●●●\")\n",
895
+ " input.change(fn=embed_key, inputs=input, outputs=None)\n",
896
+ "\n",
897
+ " with gr.Blocks():\n",
898
+ " #########################\n",
899
+ " #########Context#########\n",
900
+ " #########################\n",
901
+ " with gr.Accordion(\"Context section\"):\n",
902
+ " ### Should also allow vector stores\n",
903
+ " gr.Markdown(\"## Please upload the context document(s) for Oral exam\")\n",
904
+ " context_input = gr.File(label=\"Click to upload context file\",\n",
905
+ " file_count=\"multiple\",\n",
906
+ " file_types=[\".txt\", \".docx\", \".pdf\"])\n",
907
+ " outputs_context=gr.Textbox(label=\"Context\")\n",
908
+ " context_input.change(fn=process_file, inputs=context_input, outputs=outputs_context)\n",
909
+ " # upload_button = gr.Button(value=\"Show context\")\n",
910
+ " # upload_button.click(process_file, context_input, outputs_context)\n",
911
+ "\n",
912
+ " #########################\n",
913
+ " #######Main Audio########\n",
914
+ " #########################\n",
915
+ " with gr.Accordion(\"Main audio section\"):\n",
916
+ " gr.Markdown(\"## Upload your audio file or start recording\")\n",
917
+ " with gr.Column():\n",
918
+ " ## uploading files seem not working (don't know why)\n",
919
+ " with gr.Row():\n",
920
+ " file_input = gr.Audio(label=\"Upload Audio\", source=\"upload\", type=\"filepath\")\n",
921
+ " record_inputs = gr.Audio(label=\"Record Audio\", source=\"microphone\", type=\"filepath\")\n",
922
+ "\n",
923
+ " gr.Markdown(\"## Transcribe the audio uploaded or recorded\")\n",
924
+ " outputs_transcribe=gr.Textbox(label=\"Transcription\")\n",
925
+ "\n",
926
+ " file_input.change(fn=transcribe, inputs=file_input, outputs=outputs_transcribe)\n",
927
+ " record_inputs.change(fn=transcribe, inputs=record_inputs, outputs=outputs_transcribe)\n",
928
+ "\n",
929
+ " #########################\n",
930
+ " ###Question Generation###\n",
931
+ " #########################\n",
932
+ " with gr.Accordion(\"Question section\"):\n",
933
+ " gr.Markdown(\"## Questions\")\n",
934
+ " with gr.Row():\n",
935
+ " with gr.Column():\n",
936
+ " outputs_qa=gr.Textbox(label=\"Generate questions\")\n",
937
+ " btn3 = gr.Button(value=\"Generate questions\")\n",
938
+ " btn3.click(generate_questions, inputs=[context_input, outputs_transcribe], outputs=outputs_qa)\n",
939
+ "\n",
940
+ " ######################### Need additional work to include these questions when click button #########################\n",
941
+ " with gr.Column():\n",
942
+ " submit_question=gr.Textbox(label=\"Use existing questions\")\n",
943
+ " btn4 = gr.Button(value=\"Use these questions\")\n",
944
+ " # btn4.click(use_this_question, inputs=outputs_transcribe, outputs=None)\n",
945
+ "\n",
946
+ " #########################\n",
947
+ " #########Audio QA########\n",
948
+ " #########################\n",
949
+ " with gr.Accordion(\"Audio QA section\"):\n",
950
+ " gr.Markdown(\"## Question answering\")\n",
951
+ " ##### This may be iterative\n",
952
+ " with gr.Row():\n",
953
+ " file_input2 = gr.Audio(label=\"Upload Audio\", source=\"upload\", type=\"filepath\")\n",
954
+ " record_inputs2 = gr.Audio(label=\"Record Audio\", source=\"microphone\", type=\"filepath\")\n",
955
+ "\n",
956
+ " gr.Markdown(\"## Transcribe the audio uploaded or recorded\")\n",
957
+ " outputs_transcribe2=gr.Textbox(label=\"Transcription\")\n",
958
+ " file_input2.change(fn=transcribe, inputs=file_input2, outputs=outputs_transcribe2)\n",
959
+ " record_inputs2.change(fn=transcribe, inputs=record_inputs2, outputs=outputs_transcribe2)\n",
960
+ "\n",
961
+ " #########################\n",
962
+ " #######Evaluation########\n",
963
+ " #########################\n",
964
+ " with gr.Accordion(\"Evaluation section\"):\n",
965
+ " gr.Markdown(\"## Evaluation\")\n",
966
+ " with gr.Tab(\"General evalution\"):\n",
967
+ " evalution=gr.Textbox(label=\"AI Evaluation\")\n",
968
+ " btn5 = gr.Button(value=\"Evaluate\")\n",
969
+ " btn5.click(ai_evaluate, inputs=[context_input, record_inputs,record_inputs2, outputs_qa], outputs=evalution)\n",
970
+ " with gr.Tab(\"Quantitative evalution\"):\n",
971
+ " table_output = gr.Dataframe(label = \"Some kind of evaluation metrics?\")\n",
972
+ " btn6 = gr.Button(value=\"Evaluate\")\n",
973
+ " btn6.click(ai_evaluate, inputs=[context_input, record_inputs,record_inputs2, outputs_qa], outputs=table_output)\n",
974
+ "\n",
975
+ " demo.launch()\n",
976
+ " # demo.launch(share=True)\n",
977
+ " # demo.launch(debug=True)"
978
+ ],
979
+ "metadata": {
980
+ "colab": {
981
+ "base_uri": "https://localhost:8080/",
982
+ "height": 616
983
+ },
984
+ "id": "EAPljDMYVy3u",
985
+ "outputId": "1f347376-14e8-48ea-e531-295a4fefd6cd",
986
+ "cellView": "form"
987
+ },
988
+ "execution_count": null,
989
+ "outputs": [
990
+ {
991
+ "output_type": "stream",
992
+ "name": "stdout",
993
+ "text": [
994
+ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
995
+ "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n",
996
+ "\n",
997
+ "To create a public link, set `share=True` in `launch()`.\n"
998
+ ]
999
+ },
1000
+ {
1001
+ "output_type": "display_data",
1002
+ "data": {
1003
+ "text/plain": [
1004
+ "<IPython.core.display.Javascript object>"
1005
+ ],
1006
+ "application/javascript": [
1007
+ "(async (port, path, width, height, cache, element) => {\n",
1008
+ " if (!google.colab.kernel.accessAllowed && !cache) {\n",
1009
+ " return;\n",
1010
+ " }\n",
1011
+ " element.appendChild(document.createTextNode(''));\n",
1012
+ " const url = await google.colab.kernel.proxyPort(port, {cache});\n",
1013
+ "\n",
1014
+ " const external_link = document.createElement('div');\n",
1015
+ " external_link.innerHTML = `\n",
1016
+ " <div style=\"font-family: monospace; margin-bottom: 0.5rem\">\n",
1017
+ " Running on <a href=${new URL(path, url).toString()} target=\"_blank\">\n",
1018
+ " https://localhost:${port}${path}\n",
1019
+ " </a>\n",
1020
+ " </div>\n",
1021
+ " `;\n",
1022
+ " element.appendChild(external_link);\n",
1023
+ "\n",
1024
+ " const iframe = document.createElement('iframe');\n",
1025
+ " iframe.src = new URL(path, url).toString();\n",
1026
+ " iframe.height = height;\n",
1027
+ " iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n",
1028
+ " iframe.width = width;\n",
1029
+ " iframe.style.border = 0;\n",
1030
+ " element.appendChild(iframe);\n",
1031
+ " })(7862, \"/\", \"100%\", 500, false, window.element)"
1032
+ ]
1033
+ },
1034
+ "metadata": {}
1035
+ }
1036
+ ]
1037
+ },
1038
+ {
1039
+ "cell_type": "markdown",
1040
+ "source": [
1041
+ "### Baseline Functionality V2"
1042
+ ],
1043
+ "metadata": {
1044
+ "id": "YAKl-5P4dHEF"
1045
+ }
1046
+ },
1047
+ {
1048
+ "cell_type": "code",
1049
+ "source": [
1050
+ "# @title\n",
1051
+ "with gr.Blocks() as demo:\n",
1052
+ " gr.Markdown(\"# Oral Exam App\")\n",
1053
+ " gr.Markdown(\"## OpenAI API key\")\n",
1054
+ " with gr.Box():\n",
1055
+ " gr.HTML(\"\"\"Embed your OpenAI API key below; if you haven't created one already, visit\n",
1056
+ " platform.openai.com/account/api-keys\n",
1057
+ " to sign up for an account and get your personal API key\"\"\",\n",
1058
+ " elem_classes=\"textbox_label\")\n",
1059
+ " input = gr.Textbox(show_label=False, type=\"password\", container=False,\n",
1060
+ " placeholder=\"●●●●●●●●●●●●●●●●●\")\n",
1061
+ " input.change(fn=embed_key, inputs=input, outputs=None)\n",
1062
+ "\n",
1063
+ " with gr.Blocks():\n",
1064
+ " #########################\n",
1065
+ " #########Context#########\n",
1066
+ " #########################\n",
1067
+ " with gr.Accordion(\"Context section\"):\n",
1068
+ " ### Should also allow vector stores\n",
1069
+ " gr.Markdown(\"## Please upload the context document(s) for Oral exam\")\n",
1070
+ " context_input = gr.File(label=\"Click to upload context file\",\n",
1071
+ " file_count=\"multiple\",\n",
1072
+ " file_types=[\".txt\", \".docx\", \".pdf\"])\n",
1073
+ " outputs_context=gr.Textbox(label=\"Context\")\n",
1074
+ " context_input.change(fn=process_file, inputs=context_input, outputs=outputs_context)\n",
1075
+ " # upload_button = gr.Button(value=\"Show context\")\n",
1076
+ " # upload_button.click(process_file, context_input, outputs_context)\n",
1077
+ "\n",
1078
+ " #########################\n",
1079
+ " ###Question Generation###\n",
1080
+ " #########################\n",
1081
+ " with gr.Accordion(\"Question section\"):\n",
1082
+ " gr.Markdown(\"## Questions\")\n",
1083
+ " with gr.Row():\n",
1084
+ " with gr.Column():\n",
1085
+ " outputs_qa=gr.Textbox(label=\"Generate questions\")\n",
1086
+ " btn1 = gr.Button(value=\"Generate questions\")\n",
1087
+ " btn1.click(generate_questions_v2, inputs=outputs_context, outputs=outputs_qa)\n",
1088
+ "\n",
1089
+ " ######################### Need additional work to include these questions when click button #########################\n",
1090
+ " with gr.Column():\n",
1091
+ " submit_question=gr.Textbox(label=\"Use existing questions\")\n",
1092
+ " btn4 = gr.Button(value=\"Use these questions\")\n",
1093
+ " # btn4.click(use_this_question, inputs=outputs_transcribe, outputs=None)\n",
1094
+ "\n",
1095
+ " #########################\n",
1096
+ " #######Main Audio########\n",
1097
+ " #########################\n",
1098
+ " with gr.Accordion(\"Main audio section\"):\n",
1099
+ " gr.Markdown(\"## Upload your audio file or start recording\")\n",
1100
+ " with gr.Column():\n",
1101
+ " ## uploading files seem not working (don't know why)\n",
1102
+ " with gr.Row():\n",
1103
+ " file_input = gr.Audio(label=\"Upload Audio\", source=\"upload\", type=\"filepath\")\n",
1104
+ " record_inputs = gr.Audio(label=\"Record Audio\", source=\"microphone\", type=\"filepath\")\n",
1105
+ "\n",
1106
+ " gr.Markdown(\"## Transcribe the audio uploaded or recorded\")\n",
1107
+ " outputs_transcribe=gr.Textbox(label=\"Transcription\")\n",
1108
+ "\n",
1109
+ " file_input.change(fn=transcribe, inputs=file_input, outputs=outputs_transcribe)\n",
1110
+ " record_inputs.change(fn=transcribe, inputs=record_inputs, outputs=outputs_transcribe)\n",
1111
+ "\n",
1112
+ " #########################\n",
1113
+ " #######Evaluation########\n",
1114
+ " #########################\n",
1115
+ " with gr.Accordion(\"Evaluation section\"):\n",
1116
+ " gr.Markdown(\"## Evaluation\")\n",
1117
+ " with gr.Tab(\"General evalution\"):\n",
1118
+ " evalution=gr.Textbox(label=\"AI Evaluation\")\n",
1119
+ " btn5 = gr.Button(value=\"Evaluate\")\n",
1120
+ " btn5.click(ai_evaluate_v2, inputs=[outputs_context, outputs_transcribe, outputs_qa], outputs=evalution)\n",
1121
+ " with gr.Tab(\"Quantitative evalution\"):\n",
1122
+ " table_output = gr.Dataframe(label = \"Some kind of evaluation metrics?\")\n",
1123
+ " btn6 = gr.Button(value=\"Evaluate\")\n",
1124
+ " btn6.click(ai_evaluate_v2, inputs=[outputs_context, outputs_transcribe, outputs_qa], outputs=table_output)\n",
1125
+ "\n",
1126
+ " demo.launch()"
1127
+ ],
1128
+ "metadata": {
1129
+ "colab": {
1130
+ "base_uri": "https://localhost:8080/",
1131
+ "height": 706
1132
+ },
1133
+ "id": "04KxUQgUcTrm",
1134
+ "outputId": "66a9f8c8-36fe-4792-b7d6-3befa6f09269",
1135
+ "cellView": "form",
1136
+ "collapsed": true
1137
+ },
1138
+ "execution_count": 23,
1139
+ "outputs": [
1140
+ {
1141
+ "output_type": "stream",
1142
+ "name": "stderr",
1143
+ "text": [
1144
+ "/usr/local/lib/python3.10/dist-packages/gradio/utils.py:833: UserWarning: Expected 2 arguments for function <function generate_questions_v2 at 0x7aa8748f9bd0>, received 1.\n",
1145
+ " warnings.warn(\n",
1146
+ "/usr/local/lib/python3.10/dist-packages/gradio/utils.py:837: UserWarning: Expected at least 2 arguments for function <function generate_questions_v2 at 0x7aa8748f9bd0>, received 1.\n",
1147
+ " warnings.warn(\n"
1148
+ ]
1149
+ },
1150
+ {
1151
+ "output_type": "stream",
1152
+ "name": "stdout",
1153
+ "text": [
1154
+ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
1155
+ "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n",
1156
+ "\n",
1157
+ "To create a public link, set `share=True` in `launch()`.\n"
1158
+ ]
1159
+ },
1160
+ {
1161
+ "output_type": "display_data",
1162
+ "data": {
1163
+ "text/plain": [
1164
+ "<IPython.core.display.Javascript object>"
1165
+ ],
1166
+ "application/javascript": [
1167
+ "(async (port, path, width, height, cache, element) => {\n",
1168
+ " if (!google.colab.kernel.accessAllowed && !cache) {\n",
1169
+ " return;\n",
1170
+ " }\n",
1171
+ " element.appendChild(document.createTextNode(''));\n",
1172
+ " const url = await google.colab.kernel.proxyPort(port, {cache});\n",
1173
+ "\n",
1174
+ " const external_link = document.createElement('div');\n",
1175
+ " external_link.innerHTML = `\n",
1176
+ " <div style=\"font-family: monospace; margin-bottom: 0.5rem\">\n",
1177
+ " Running on <a href=${new URL(path, url).toString()} target=\"_blank\">\n",
1178
+ " https://localhost:${port}${path}\n",
1179
+ " </a>\n",
1180
+ " </div>\n",
1181
+ " `;\n",
1182
+ " element.appendChild(external_link);\n",
1183
+ "\n",
1184
+ " const iframe = document.createElement('iframe');\n",
1185
+ " iframe.src = new URL(path, url).toString();\n",
1186
+ " iframe.height = height;\n",
1187
+ " iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n",
1188
+ " iframe.width = width;\n",
1189
+ " iframe.style.border = 0;\n",
1190
+ " element.appendChild(iframe);\n",
1191
+ " })(7863, \"/\", \"100%\", 500, false, window.element)"
1192
+ ]
1193
+ },
1194
+ "metadata": {}
1195
+ }
1196
+ ]
1197
+ },
1198
+ {
1199
+ "cell_type": "code",
1200
+ "source": [
1201
+ "def prompt_select(selection, number, length):\n",
1202
+ " if selection == \"Random\":\n",
1203
+ " prompt = f\"Please design a {number} question quiz based on the context provided and the inputted learning objectives (if applicable). The types of questions should be randomized (including multiple choice, short answer, true/false, short answer, etc.). Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide 1 question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right.\"\n",
1204
+ " elif selection == \"Fill in the Blank\":\n",
1205
+ " prompt = f\"Create a {number} question fill in the blank quiz refrencing the context provided. The quiz should reflect the learning objectives (if inputted). The 'blank' part of the question should appear as '________'. The answers should reflect what word(s) should go in the blank an accurate statement. An example is the follow: 'The author of the article is ______.' The question should be a statement. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right.\"\n",
1206
+ " elif selection == \"Short Answer\":\n",
1207
+ " prompt = f\"Please design a {number} question quiz about which reflects the learning objectives (if inputted). The questions should be short answer. Expect the correct answers to be {length} sentences long. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct answer is right.\"\n",
1208
+ " else:\n",
1209
+ " prompt = f\"Please design a {number} question {selection.lower()} quiz based on the context provided and the inputted learning objectives (if applicable). Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide 1 question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right.\"\n",
1210
+ " return prompt\n",
1211
+ "\n",
1212
+ "\n",
1213
+ "# Function to save prompts (premade or custom) and return in the user input box in the chatbot`\n",
1214
+ "saved_text = \"\"\n",
1215
+ "def save_text(text):\n",
1216
+ " global saved_text\n",
1217
+ " saved_text = text\n",
1218
+ "\n",
1219
+ "def return_text():\n",
1220
+ " # Return the saved text\n",
1221
+ " return saved_text"
1222
+ ],
1223
+ "metadata": {
1224
+ "id": "wF80F1wU80rU"
1225
+ },
1226
+ "execution_count": 14,
1227
+ "outputs": []
1228
+ },
1229
+ {
1230
+ "cell_type": "markdown",
1231
+ "source": [
1232
+ "### Baseline Functionality V3"
1233
+ ],
1234
+ "metadata": {
1235
+ "id": "F5-Ja2evCE4X"
1236
+ }
1237
+ },
1238
+ {
1239
+ "cell_type": "markdown",
1240
+ "source": [
1241
+ "Updated Question Selection and Chatbot Feature"
1242
+ ],
1243
+ "metadata": {
1244
+ "id": "rr8YlzcJCKv4"
1245
+ }
1246
+ },
1247
+ {
1248
+ "cell_type": "code",
1249
+ "source": [
1250
+ "with gr.Blocks() as demo:\n",
1251
+ " gr.Markdown(\"# Oral Exam App\")\n",
1252
+ " gr.Markdown(\"## OpenAI API key\")\n",
1253
+ " with gr.Box():\n",
1254
+ " gr.HTML(\"\"\"Embed your OpenAI API key below; if you haven't created one already, visit\n",
1255
+ " platform.openai.com/account/api-keys\n",
1256
+ " to sign up for an account and get your personal API key\"\"\",\n",
1257
+ " elem_classes=\"textbox_label\")\n",
1258
+ " input = gr.Textbox(show_label=False, type=\"password\", container=False,\n",
1259
+ " placeholder=\"●●●●●●●●●●●●●●●●●\")\n",
1260
+ " input.change(fn=embed_key, inputs=input, outputs=None)\n",
1261
+ "\n",
1262
+ " with gr.Blocks():\n",
1263
+ " #########################\n",
1264
+ " #########Context#########\n",
1265
+ " #########################\n",
1266
+ " with gr.Accordion(\"Context section\"):\n",
1267
+ " ### Should also allow vector stores\n",
1268
+ " gr.Markdown(\"## Please upload the context document(s) for Oral exam\")\n",
1269
+ " context_input = gr.File(label=\"Click to upload context file\",\n",
1270
+ " file_count=\"multiple\",\n",
1271
+ " file_types=[\".txt\", \".docx\", \".pdf\"])\n",
1272
+ " outputs_context=gr.Textbox(label=\"Context\")\n",
1273
+ " context_input.change(fn=process_file, inputs=context_input, outputs=outputs_context)\n",
1274
+ " # upload_button = gr.Button(value=\"Show context\")\n",
1275
+ " # upload_button.click(process_file, context_input, outputs_context)\n",
1276
+ "\n",
1277
+ " with gr.Blocks():\n",
1278
+ " gr.Markdown(\"\"\"\n",
1279
+ " ## Generate a Premade Prompt\n",
1280
+ " Select your type and number of desired questions. Click \"Generate Prompt\" to get your premade prompt,\n",
1281
+ " and then \"Insert Prompt into Chat\" to copy the text into the chat interface below. \\\n",
1282
+ " You can also copy the prompt using the icon in the upper right corner and paste directly into the input box when interacting with the model.\n",
1283
+ " \"\"\")\n",
1284
+ " with gr.Row():\n",
1285
+ " with gr.Column():\n",
1286
+ " question_type = gr.Dropdown([\"Multiple Choice\", \"True or False\", \"Short Answer\", \"Fill in the Blank\", \"Random\"], label=\"Question Type\")\n",
1287
+ " number_of_questions = gr.Textbox(label=\"Enter desired number of questions\")\n",
1288
+ " sa_desired_length = gr.Dropdown([\"1-2\", \"3-4\", \"5-6\", \"6 or more\"], label = \"For short answer questions only, choose the desired sentence length for answers. The default value is 1-2 sentences.\")\n",
1289
+ " with gr.Column():\n",
1290
+ " prompt_button = gr.Button(\"Generate Prompt\")\n",
1291
+ " premade_prompt_output = gr.Textbox(label=\"Generated prompt (save or copy)\", show_copy_button=True)\n",
1292
+ " prompt_button.click(prompt_select,\n",
1293
+ " inputs=[question_type, number_of_questions, sa_desired_length],\n",
1294
+ " outputs=premade_prompt_output)\n",
1295
+ " ########################\n",
1296
+ " ##Question Generation###\n",
1297
+ " ########################\n",
1298
+ " with gr.Accordion(\"Question section\"):\n",
1299
+ " gr.Markdown(\"## Questions\")\n",
1300
+ " with gr.Row():\n",
1301
+ " with gr.Column():\n",
1302
+ " outputs_qa=gr.Textbox(label=\"Generate questions\")\n",
1303
+ " btn1 = gr.Button(value=\"Generate questions\")\n",
1304
+ " btn1.click(generate_questions_v2, inputs=[outputs_context, premade_prompt_output], outputs=outputs_qa)\n",
1305
+ "\n",
1306
+ " ######################### Need additional work to include these questions when click button #########################\n",
1307
+ " with gr.Column():\n",
1308
+ " submit_question=gr.Textbox(label=\"Use existing questions\")\n",
1309
+ " btn4 = gr.Button(value=\"Use these questions\")\n",
1310
+ " # btn4.click(use_this_question, inputs=outputs_transcribe, outputs=None)\n",
1311
+ "\n",
1312
+ " #########################\n",
1313
+ " #######Main Audio########\n",
1314
+ " #########################\n",
1315
+ " with gr.Accordion(\"Main audio section\"):\n",
1316
+ " gr.Markdown(\"## Upload your audio file or start recording\")\n",
1317
+ " with gr.Column():\n",
1318
+ " ## uploading files seem not working (don't know why)\n",
1319
+ " with gr.Row():\n",
1320
+ " file_input = gr.Audio(label=\"Upload Audio\", source=\"upload\", type=\"filepath\")\n",
1321
+ " record_inputs = gr.Audio(label=\"Record Audio\", source=\"microphone\", type=\"filepath\")\n",
1322
+ "\n",
1323
+ " gr.Markdown(\"## Transcribe the audio uploaded or recorded\")\n",
1324
+ " outputs_transcribe=gr.Textbox(label=\"Transcription\")\n",
1325
+ "\n",
1326
+ " file_input.change(fn=transcribe, inputs=file_input, outputs=outputs_transcribe)\n",
1327
+ " record_inputs.change(fn=transcribe, inputs=record_inputs, outputs=outputs_transcribe)\n",
1328
+ "\n",
1329
+ " #########################\n",
1330
+ " #######Evaluation########\n",
1331
+ " #########################\n",
1332
+ " with gr.Accordion(\"Evaluation section\"):\n",
1333
+ " gr.Markdown(\"## Evaluation\")\n",
1334
+ " with gr.Tab(\"General evalution\"):\n",
1335
+ " evalution=gr.Textbox(label=\"AI Evaluation\")\n",
1336
+ " btn5 = gr.Button(value=\"Evaluate\")\n",
1337
+ " btn5.click(ai_evaluate_v2, inputs=[outputs_context, outputs_transcribe, outputs_qa], outputs=evalution)\n",
1338
+ " with gr.Tab(\"Quantitative evalution\"):\n",
1339
+ " table_output = gr.Dataframe(label = \"Some kind of evaluation metrics?\")\n",
1340
+ " btn6 = gr.Button(value=\"Evaluate\")\n",
1341
+ " btn6.click(ai_evaluate_v2, inputs=[outputs_context, outputs_transcribe, outputs_qa], outputs=table_output)\n",
1342
+ "\n",
1343
+ "\n",
1344
+ " # Chatbot (https://gradio.app/creating-a-chatbot/)\n",
1345
+ " '''\n",
1346
+ " with gr.Blocks():\n",
1347
+ " gr.Markdown(\"\"\"\n",
1348
+ " ## Chat with the Model\n",
1349
+ " Click \"Display Prompt\" to display the premade or custom prompt that you created earlier. Then, continue chatting with the model.\n",
1350
+ " \"\"\")\n",
1351
+ " with gr.Row():\n",
1352
+ " show_prompt_block = gr.Button(\"Display Prompt\")\n",
1353
+ " '''\n",
1354
+ " gr.Markdown(\"## Chat with the Model\")\n",
1355
+ " with gr.Row(equal_height=True):\n",
1356
+ " with gr.Column(scale=2):\n",
1357
+ " chatbot = gr.Chatbot()\n",
1358
+ " with gr.Row():\n",
1359
+ " user_chat_input = gr.Textbox(label=\"User input\", scale=9)\n",
1360
+ " user_chat_input.submit(return_text, inputs=None, outputs=user_chat_input)\n",
1361
+ " user_chat_submit = gr.Button(\"Ask/answer model\", scale=1)\n",
1362
+ " #show_prompt_block.click(return_text, inputs=None, outputs=user_chat_input)\n",
1363
+ "\n",
1364
+ " # TODO Move the sources so it's displayed to the right of the chat bot,\n",
1365
+ " # with the sources taking up about 1/3rd of the horizontal space\n",
1366
+ " # with gr.Box(elem_id=\"sources-container\", scale=1):\n",
1367
+ " # # TODO: Display document sources in a nicer format?\n",
1368
+ " # gr.HTML(value=\"<h3 id='sources'>Sources</h3>\")\n",
1369
+ " # sources_output = []\n",
1370
+ " # for i in range(num_sources):\n",
1371
+ " # source_elem = gr.HTML(visible=False)\n",
1372
+ " # sources_output.append(source_elem)\n",
1373
+ "\n",
1374
+ "demo.launch()"
1375
+ ],
1376
+ "metadata": {
1377
+ "colab": {
1378
+ "base_uri": "https://localhost:8080/",
1379
+ "height": 616
1380
+ },
1381
+ "id": "Y7-3JFuZ8H5k",
1382
+ "outputId": "ea99ce65-7b79-4d39-dd88-44785b0d6615"
1383
+ },
1384
+ "execution_count": 24,
1385
+ "outputs": [
1386
+ {
1387
+ "output_type": "stream",
1388
+ "name": "stdout",
1389
+ "text": [
1390
+ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
1391
+ "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n",
1392
+ "\n",
1393
+ "To create a public link, set `share=True` in `launch()`.\n"
1394
+ ]
1395
+ },
1396
+ {
1397
+ "output_type": "display_data",
1398
+ "data": {
1399
+ "text/plain": [
1400
+ "<IPython.core.display.Javascript object>"
1401
+ ],
1402
+ "application/javascript": [
1403
+ "(async (port, path, width, height, cache, element) => {\n",
1404
+ " if (!google.colab.kernel.accessAllowed && !cache) {\n",
1405
+ " return;\n",
1406
+ " }\n",
1407
+ " element.appendChild(document.createTextNode(''));\n",
1408
+ " const url = await google.colab.kernel.proxyPort(port, {cache});\n",
1409
+ "\n",
1410
+ " const external_link = document.createElement('div');\n",
1411
+ " external_link.innerHTML = `\n",
1412
+ " <div style=\"font-family: monospace; margin-bottom: 0.5rem\">\n",
1413
+ " Running on <a href=${new URL(path, url).toString()} target=\"_blank\">\n",
1414
+ " https://localhost:${port}${path}\n",
1415
+ " </a>\n",
1416
+ " </div>\n",
1417
+ " `;\n",
1418
+ " element.appendChild(external_link);\n",
1419
+ "\n",
1420
+ " const iframe = document.createElement('iframe');\n",
1421
+ " iframe.src = new URL(path, url).toString();\n",
1422
+ " iframe.height = height;\n",
1423
+ " iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n",
1424
+ " iframe.width = width;\n",
1425
+ " iframe.style.border = 0;\n",
1426
+ " element.appendChild(iframe);\n",
1427
+ " })(7864, \"/\", \"100%\", 500, false, window.element)"
1428
+ ]
1429
+ },
1430
+ "metadata": {}
1431
+ },
1432
+ {
1433
+ "output_type": "execute_result",
1434
+ "data": {
1435
+ "text/plain": []
1436
+ },
1437
+ "metadata": {},
1438
+ "execution_count": 24
1439
+ }
1440
+ ]
1441
+ },
1442
+ {
1443
+ "cell_type": "markdown",
1444
+ "source": [
1445
+ "### What's left\n",
1446
+ "- vector store (link) upload\n",
1447
+ "- submit question section need to be linked with ai_evaluate function"
1448
+ ],
1449
+ "metadata": {
1450
+ "id": "g2EVIogW69Fd"
1451
+ }
1452
+ }
1453
+ ]
1454
+ }
lo-achievement/UI_design_oral_exam_chatbot.ipynb ADDED
@@ -0,0 +1,1004 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "view-in-github",
7
+ "colab_type": "text"
8
+ },
9
+ "source": [
10
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/124-implement-baseline-functionality-for-oral-exam-module/UI_design_oral_exam_chatbot.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {
16
+ "id": "PIbogPXyM0wr"
17
+ },
18
+ "source": [
19
+ "# Project IO Achievement - UI Design (Oral Exam)"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "markdown",
24
+ "metadata": {
25
+ "id": "x_Vp8SiKM4p1"
26
+ },
27
+ "source": [
28
+ "## Problem Definition\n",
29
+ "\n",
30
+ "The v1 functionality for the Oral Exam module requires the following:\n",
31
+ "\n",
32
+ "1. Upload or generation of questions: either the user should upload a set of questions or we should allow the model to generate the questions. The user should pick or it should be inherent if there is no upload of questions. Note that we must also allow for context to be uploaded (vector store, vector store link, specific documents)\n",
33
+ "2. The model should prompt the user with a question and pause.\n",
34
+ "The user should respond by audio.\n",
35
+ "3. This should continue on until some final point where the exam is over.\n",
36
+ "\n",
37
+ "Then:\n",
38
+ "\n",
39
+ "1. We should use Whisper to do the transcription, and\n",
40
+ "2. Send the transcription, questions, and context for GPT4 for evaluation\n",
41
+ "Return the evaluation.\n",
42
+ "3. This will primarily be work on a user interface."
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "markdown",
47
+ "metadata": {
48
+ "id": "o_60X8H3NEne"
49
+ },
50
+ "source": [
51
+ "## Libraries\n",
52
+ "\n",
53
+ "This section will install and import some important libraries such as Langchain, openai, Gradio, and so on"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 1,
59
+ "metadata": {
60
+ "id": "pxcqXgg2aAN7"
61
+ },
62
+ "outputs": [],
63
+ "source": [
64
+ "# install libraries here\n",
65
+ "# -q flag for \"quiet\" install\n",
66
+ "%%capture\n",
67
+ "!pip install -q langchain\n",
68
+ "!pip install -q openai\n",
69
+ "!pip install -q gradio\n",
70
+ "# !pip install -q datasets\n",
71
+ "!pip install -q torchaudio\n",
72
+ "!pip install -q git+https://github.com/openai/whisper.git\n",
73
+ "!pip install -q docx\n",
74
+ "!pip install -q PyPDF2\n",
75
+ "!pip install -q python-docx"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": 2,
81
+ "metadata": {
82
+ "id": "pEjM1tLsMZBq"
83
+ },
84
+ "outputs": [],
85
+ "source": [
86
+ "# import libraries here\n",
87
+ "from langchain.llms import OpenAI\n",
88
+ "from langchain.prompts import PromptTemplate\n",
89
+ "from langchain.document_loaders import TextLoader\n",
90
+ "from langchain.indexes import VectorstoreIndexCreator\n",
91
+ "from langchain import ConversationChain, LLMChain, PromptTemplate\n",
92
+ "from langchain.chat_models import ChatOpenAI\n",
93
+ "from langchain.memory import ConversationBufferWindowMemory\n",
94
+ "from langchain.prompts import ChatPromptTemplate\n",
95
+ "from langchain.text_splitter import CharacterTextSplitter\n",
96
+ "from langchain.embeddings import OpenAIEmbeddings\n",
97
+ "import openai\n",
98
+ "import os\n",
99
+ "from getpass import getpass\n",
100
+ "# from IPython.display import display, Javascript, HTML\n",
101
+ "# from google.colab.output import eval_js\n",
102
+ "# from base64 import b64decode\n",
103
+ "# import ipywidgets as widgets\n",
104
+ "# from IPython.display import clear_output\n",
105
+ "import time\n",
106
+ "import requests\n",
107
+ "# from datasets import load_dataset\n",
108
+ "# from torchaudio.transforms import Resample\n",
109
+ "import whisper\n",
110
+ "import numpy as np\n",
111
+ "import torch\n",
112
+ "import librosa\n",
113
+ "# from datasets import load_dataset\n",
114
+ "#from jiwer import wer\n",
115
+ "import pandas as pd\n",
116
+ "import gradio as gr\n",
117
+ "from docx import Document\n",
118
+ "import PyPDF2\n",
119
+ "from pydub import AudioSegment\n",
120
+ "import tempfile"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "markdown",
125
+ "metadata": {
126
+ "id": "03KLZGI_a5W5"
127
+ },
128
+ "source": [
129
+ "## API Keys\n",
130
+ "\n",
131
+ "Use these cells to load the API keys required for this notebook. The below code cell uses the `getpass` library."
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": 3,
137
+ "metadata": {
138
+ "colab": {
139
+ "base_uri": "https://localhost:8080/"
140
+ },
141
+ "id": "5smcWj4DbFgy",
142
+ "outputId": "6bc91507-cd3c-4808-8976-811d7fc7cb29"
143
+ },
144
+ "outputs": [
145
+ {
146
+ "name": "stdout",
147
+ "output_type": "stream",
148
+ "text": [
149
+ "··········\n"
150
+ ]
151
+ }
152
+ ],
153
+ "source": [
154
+ "openai_api_key = getpass()\n",
155
+ "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
156
+ "openai.api_key = openai_api_key"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "markdown",
161
+ "metadata": {
162
+ "id": "pMo9x8u4AEV1"
163
+ },
164
+ "source": [
165
+ "## Prompt Design"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": 4,
171
+ "metadata": {
172
+ "colab": {
173
+ "base_uri": "https://localhost:8080/"
174
+ },
175
+ "id": "UgnCZRMhADvo",
176
+ "outputId": "462e62c7-a618-4549-e651-858514757235"
177
+ },
178
+ "outputs": [
179
+ {
180
+ "output_type": "execute_result",
181
+ "data": {
182
+ "text/plain": [
183
+ "ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-4', temperature=0.0, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_model_name=None)"
184
+ ]
185
+ },
186
+ "metadata": {},
187
+ "execution_count": 4
188
+ }
189
+ ],
190
+ "source": [
191
+ "chat = ChatOpenAI(temperature=0.0, model_name='gpt-4')\n",
192
+ "chat"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "markdown",
197
+ "source": [
198
+ "### Chatbot Prompts"
199
+ ],
200
+ "metadata": {
201
+ "id": "2tTNiyU-ZcDU"
202
+ }
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": 15,
207
+ "metadata": {
208
+ "colab": {
209
+ "base_uri": "https://localhost:8080/"
210
+ },
211
+ "id": "r-VmK_7vHrmw",
212
+ "outputId": "8c314a8f-dad5-47b9-ddba-f6009a73b80d"
213
+ },
214
+ "outputs": [
215
+ {
216
+ "output_type": "execute_result",
217
+ "data": {
218
+ "text/plain": [
219
+ "['history', 'input', 'instruction', 'questions']"
220
+ ]
221
+ },
222
+ "metadata": {},
223
+ "execution_count": 15
224
+ }
225
+ ],
226
+ "source": [
227
+ "template_string3 = \"\"\"\n",
228
+ "Please ask me the following questions in sequence, and after I provide the answer, \\\n",
229
+ "please give me some feedback. Here is the instruction for feedback: {instruction}. If no instruction is provided, please provide feedback based on your judgement. \\\n",
230
+ "Just ask me the question, and please do not show any other text (no need for greetings for example) \\\n",
231
+ "Here are the questions that you can will me: {questions}. \\\n",
232
+ "Here are the chat history: {history}. \\\n",
233
+ "{input}\n",
234
+ "\n",
235
+ "Once all questions are answered, thank the user and give overall feedback for the question answering part.\n",
236
+ "\"\"\"\n",
237
+ "prompt_template3 = ChatPromptTemplate.from_template(template_string3)\n",
238
+ "prompt_template3.messages[0].prompt.input_variables"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": 16,
244
+ "metadata": {
245
+ "colab": {
246
+ "base_uri": "https://localhost:8080/",
247
+ "height": 72
248
+ },
249
+ "id": "XaK7D5B4bMYv",
250
+ "outputId": "55f495b5-d8d0-4a80-b5c4-128dba34eebe"
251
+ },
252
+ "outputs": [
253
+ {
254
+ "output_type": "execute_result",
255
+ "data": {
256
+ "text/plain": [
257
+ "'\\nPlease ask me the following questions in sequence, and after I provide the answer, please give me some feedback. Here is the instruction for feedback: {instruction}. If no instruction is provided, please provide feedback based on your judgement. Just ask me the question, and please do not show any other text (no need for greetings for example) Here are the questions that you can will me: {questions}. Here are the chat history: {history}. {input}\\n\\nOnce all questions are answered, thank the user and give overall feedback for the question answering part.\\n'"
258
+ ],
259
+ "application/vnd.google.colaboratory.intrinsic+json": {
260
+ "type": "string"
261
+ }
262
+ },
263
+ "metadata": {},
264
+ "execution_count": 16
265
+ }
266
+ ],
267
+ "source": [
268
+ "prompt_template3.messages[0].prompt.template"
269
+ ]
270
+ },
271
+ {
272
+ "cell_type": "markdown",
273
+ "metadata": {
274
+ "id": "l4o8R5eUE1n8"
275
+ },
276
+ "source": [
277
+ "### Functions"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": 7,
283
+ "metadata": {
284
+ "id": "ABN0X9xQHeii"
285
+ },
286
+ "outputs": [],
287
+ "source": [
288
+ "def embed_key(openai_api_key):\n",
289
+ " os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
290
+ "\n",
291
+ "def transcribe(audio_file_path):\n",
292
+ " try:\n",
293
+ " with open(audio_file_path, \"rb\") as audio_file:\n",
294
+ " # Call OpenAI's Whisper model for transcription\n",
295
+ " transcript = openai.Audio.transcribe(\"whisper-1\", audio_file)\n",
296
+ " transcribed_text = transcript[\"text\"]\n",
297
+ " return transcribed_text\n",
298
+ " except:\n",
299
+ " return \"Your answer will be transcribed here\"\n",
300
+ "\n",
301
+ "def process_file(files):\n",
302
+ " for file in files:\n",
303
+ " try:\n",
304
+ " extension = file.name.split('.')[-1].lower()\n",
305
+ " if extension == 'docx':\n",
306
+ " doc = Document(file.name)\n",
307
+ " full_text = []\n",
308
+ " for paragraph in doc.paragraphs:\n",
309
+ " full_text.append(paragraph.text)\n",
310
+ " return '\\n'.join(full_text)\n",
311
+ "\n",
312
+ " elif extension == 'pdf':\n",
313
+ " pdf_file = open(file.name, 'rb')\n",
314
+ " reader = PyPDF2.PdfReader(pdf_file)\n",
315
+ " num_pages = len(reader.pages)\n",
316
+ " full_text = []\n",
317
+ " for page in range(num_pages):\n",
318
+ " page_obj = reader.pages[page]\n",
319
+ " full_text.append(page_obj.extract_text())\n",
320
+ " pdf_file.close()\n",
321
+ " return '\\n'.join(full_text)\n",
322
+ "\n",
323
+ " elif extension == 'txt':\n",
324
+ " with open(file.name, 'r') as txt_file:\n",
325
+ " full_text = txt_file.read()\n",
326
+ " return full_text\n",
327
+ "\n",
328
+ " else:\n",
329
+ " return \"Unsupported file type\"\n",
330
+ " except FileNotFoundError:\n",
331
+ " return \"File not found\"\n",
332
+ " except PermissionError:\n",
333
+ " return \"Permission denied\"\n",
334
+ "\n",
335
+ "def generate_questions(text, prompt):\n",
336
+ " test_input1 = question_template.format_messages(\n",
337
+ " context = text,\n",
338
+ " pre_prompt = prompt)\n",
339
+ "\n",
340
+ " response = chat(test_input1)\n",
341
+ " return response.content\n",
342
+ "\n",
343
+ "\n",
344
+ "def ai_evaluate(context, audio_transcript, QA, instructions):\n",
345
+ " test_input1 = evaluate_template.format_messages(\n",
346
+ " context = context,\n",
347
+ " transcript = audio_transcript,\n",
348
+ " QA = QA,\n",
349
+ " instructions = instructions)\n",
350
+ "\n",
351
+ " response = chat(test_input1)\n",
352
+ " return response.content\n",
353
+ "\n",
354
+ "def upload_file(files):\n",
355
+ " file_paths = [file.name for file in files]\n",
356
+ " return file_paths\n",
357
+ "\n",
358
+ "def use_these_questions(input):\n",
359
+ " return input\n",
360
+ "\n",
361
+ "################################\n",
362
+ "\n",
363
+ "def add_text(history, text, prompt = template_string3):\n",
364
+ " new_history = [(prompt, None)] + history + [(text, None)]\n",
365
+ " return new_history, gr.update(value=\"\", interactive=False)\n",
366
+ "\n",
367
+ "# def add_file(history, file):\n",
368
+ "# history = history + [((file.name,), None)]\n",
369
+ "# return history\n",
370
+ "\n",
371
+ "\n",
372
+ "def bot_initialize(input, instruction_feedback, questions_used, history):\n",
373
+ "\n",
374
+ " template_string3 = \"\"\"\n",
375
+ " Please ask me the following questions in sequence, and after I provide the answer, \\\n",
376
+ " please give me some feedback. Here is the instruction for feedback: {instruction}. If no instruction is provided, please provide feedback based on your judgement. \\\n",
377
+ " Here are the questions that you can ask me: {questions}. \\\n",
378
+ " Here are the chat history: {history}. \\\n",
379
+ " {input} \\\n",
380
+ "\n",
381
+ " *** Remember, just ask me the question, give feedbacks, and ask the next questions. Do not forget to ask the next question after feedbacks. \\\n",
382
+ " \"\"\"\n",
383
+ " prompt_template3 = ChatPromptTemplate.from_template(template_string3)\n",
384
+ "\n",
385
+ " test_input1 = prompt_template3.format_messages(\n",
386
+ " instruction = instruction_feedback,\n",
387
+ " history = history,\n",
388
+ " questions = questions_used,\n",
389
+ " input = input)\n",
390
+ "\n",
391
+ " response = chat(test_input1)\n",
392
+ " return response.content\n",
393
+ "\n",
394
+ "# def initialize(instruction_feedback, questions_used, chat_history, ready):\n",
395
+ "# test_input1 = prompt_template3.format_messages(\n",
396
+ "# instruction = instruction_feedback,\n",
397
+ "# chat_history = chat_history,\n",
398
+ "# questions = questions_used,\n",
399
+ "# ready = ready)\n",
400
+ "# response = chat(test_input1)\n",
401
+ "# return response.content\n",
402
+ "\n",
403
+ "# def bot(history):\n",
404
+ "# response = \"**That's cool!**\"\n",
405
+ "# history[-1][1] = \"\"\n",
406
+ "# for character in response:\n",
407
+ "# history[-1][1] += character\n",
408
+ "# time.sleep(0.05)\n",
409
+ "# yield history\n",
410
+ "\n",
411
+ "def message_and_history(input, instruction_feedback, questions_used, history):\n",
412
+ " history = history or []\n",
413
+ " s = list(sum(history, ()))\n",
414
+ " s.append(input)\n",
415
+ " inp = ' '.join(s)\n",
416
+ " output = bot_initialize(inp, instruction_feedback, questions_used, history)\n",
417
+ " history.append((input, output))\n",
418
+ " return history, history\n",
419
+ "\n",
420
+ "def prompt_select(selection, number, length):\n",
421
+ " if selection == \"Random\":\n",
422
+ " prompt = f\"Please design a {number} question quiz based on the context provided and the inputted learning objectives (if applicable).\"\n",
423
+ " elif selection == \"Fill in the Blank\":\n",
424
+ " prompt = f\"Create a {number} question fill in the blank quiz refrencing the context provided. The quiz should reflect the learning objectives (if inputted). The 'blank' part of the question should appear as '________'. The answers should reflect what word(s) should go in the blank an accurate statement. An example is the follow: 'The author of the article is ______.' The question should be a statement.\"\n",
425
+ " elif selection == \"Short Answer\":\n",
426
+ " prompt = f\"Please design a {number} question quiz about which reflects the learning objectives (if inputted). The questions should be short answer. Expect the correct answers to be {length} sentences long.\"\n",
427
+ " else:\n",
428
+ " prompt = f\"Please design a {number} question {selection.lower()} quiz based on the context provided and the inputted learning objectives (if applicable).\"\n",
429
+ " return prompt\n",
430
+ "\n",
431
+ "# def prompt_select(selection, number, length):\n",
432
+ "# if selection == \"Random\":\n",
433
+ "# prompt = f\"Please design a {number} question quiz based on the context provided and the inputted learning objectives (if applicable). The types of questions should be randomized (including multiple choice, short answer, true/false, short answer, etc.). Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide 1 question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right.\"\n",
434
+ "# elif selection == \"Fill in the Blank\":\n",
435
+ "# prompt = f\"Create a {number} question fill in the blank quiz refrencing the context provided. The quiz should reflect the learning objectives (if inputted). The 'blank' part of the question should appear as '________'. The answers should reflect what word(s) should go in the blank an accurate statement. An example is the follow: 'The author of the article is ______.' The question should be a statement. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right.\"\n",
436
+ "# elif selection == \"Short Answer\":\n",
437
+ "# prompt = f\"Please design a {number} question quiz about which reflects the learning objectives (if inputted). The questions should be short answer. Expect the correct answers to be {length} sentences long. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct answer is right.\"\n",
438
+ "# else:\n",
439
+ "# prompt = f\"Please design a {number} question {selection.lower()} quiz based on the context provided and the inputted learning objectives (if applicable). Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide 1 question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right.\"\n",
440
+ "# return prompt"
441
+ ]
442
+ },
443
+ {
444
+ "cell_type": "markdown",
445
+ "metadata": {
446
+ "id": "8PzIpcfg4-X0"
447
+ },
448
+ "source": [
449
+ "## Integrate Prompts from LO project"
450
+ ]
451
+ },
452
+ {
453
+ "cell_type": "markdown",
454
+ "metadata": {
455
+ "id": "E5vWxcm25EAC"
456
+ },
457
+ "source": [
458
+ "### Creating a Chain for Short Answer Generation"
459
+ ]
460
+ },
461
+ {
462
+ "cell_type": "markdown",
463
+ "metadata": {
464
+ "id": "y95FExV-5IqI"
465
+ },
466
+ "source": [
467
+ "In this example, the context would include the poem \"The Road Not Taken\" by Robert Frost"
468
+ ]
469
+ },
470
+ {
471
+ "cell_type": "code",
472
+ "execution_count": 8,
473
+ "metadata": {
474
+ "id": "j_qEXDWQ5RSW"
475
+ },
476
+ "outputs": [],
477
+ "source": [
478
+ "# This is what I used to test the function 'generate_questions_v2'\n",
479
+ "template_string = \"\"\"\n",
480
+ "You are a world-class tutor helping students to perform better on oral and written exams though interactive experiences.\"\n",
481
+ "\n",
482
+ "The following text should be used as the basis for the instructions which follow: {context} \\\n",
483
+ "\n",
484
+ "The following is the guideline for generating the questiion: {pre_prompt} \\\n",
485
+ "\n",
486
+ "The output should be formatted as following:\n",
487
+ "\n",
488
+ "Question 1: ...\n",
489
+ "Question 2: ...\n",
490
+ "Question 3: ...\n",
491
+ "...\n",
492
+ "\"\"\""
493
+ ]
494
+ },
495
+ {
496
+ "cell_type": "code",
497
+ "execution_count": 9,
498
+ "metadata": {
499
+ "colab": {
500
+ "base_uri": "https://localhost:8080/"
501
+ },
502
+ "id": "yWOM1XdC5UhQ",
503
+ "outputId": "69d781f7-fb0c-4dde-9085-ddb9908b82af"
504
+ },
505
+ "outputs": [
506
+ {
507
+ "output_type": "execute_result",
508
+ "data": {
509
+ "text/plain": [
510
+ "['context', 'pre_prompt']"
511
+ ]
512
+ },
513
+ "metadata": {},
514
+ "execution_count": 9
515
+ }
516
+ ],
517
+ "source": [
518
+ "question_template = ChatPromptTemplate.from_template(template_string)\n",
519
+ "question_template.messages[0].prompt.input_variables"
520
+ ]
521
+ },
522
+ {
523
+ "cell_type": "code",
524
+ "execution_count": 10,
525
+ "metadata": {
526
+ "id": "4Mc1ZC3jaydQ"
527
+ },
528
+ "outputs": [],
529
+ "source": [
530
+ "# @title\n",
531
+ "con = \"\"\" Two roads diverged in a yellow wood,\n",
532
+ "And sorry I could not travel both\n",
533
+ "And be one traveler, long I stood\n",
534
+ "And looked down one as far as I could\n",
535
+ "To where it bent in the undergrowth;\n",
536
+ "Then took the other, as just as fair,\n",
537
+ "And having perhaps the better claim,\n",
538
+ "Because it was grassy and wanted wear;\n",
539
+ "Though as for that the passing there\n",
540
+ "Had worn them really about the same,\n",
541
+ "And both that morning equally lay\n",
542
+ "In leaves no step had trodden black.\n",
543
+ "Oh, I kept the first for another day!\n",
544
+ "Yet knowing how way leads on to way,\n",
545
+ "I doubted if I should ever come back.\n",
546
+ "I shall be telling this with a sigh\n",
547
+ "Somewhere ages and ages hence:\n",
548
+ "Two roads diverged in a wood, and I—\n",
549
+ "I took the one less traveled by,\n",
550
+ "And that has made all the difference.\n",
551
+ "—-Robert Frost—-\n",
552
+ "Education Place: http://www.eduplace.com \"\"\"\n",
553
+ "\n",
554
+ "pre = \"Please design a 3 question quiz about which reflects the learning objectives (if inputted). The questions should be short answer. Expect the correct answers to be sentences long. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct answer is right.\"\n"
555
+ ]
556
+ },
557
+ {
558
+ "cell_type": "code",
559
+ "execution_count": 11,
560
+ "metadata": {
561
+ "colab": {
562
+ "base_uri": "https://localhost:8080/",
563
+ "height": 36
564
+ },
565
+ "id": "KXssPFyEbG3f",
566
+ "outputId": "8a5389f7-3ec6-4332-d9ae-f45ff5781caf"
567
+ },
568
+ "outputs": [
569
+ {
570
+ "output_type": "execute_result",
571
+ "data": {
572
+ "text/plain": [
573
+ "'Question 1: What is the main theme of Robert Frost\\'s poem \"The Road Not Taken\"?'"
574
+ ],
575
+ "application/vnd.google.colaboratory.intrinsic+json": {
576
+ "type": "string"
577
+ }
578
+ },
579
+ "metadata": {},
580
+ "execution_count": 11
581
+ }
582
+ ],
583
+ "source": [
584
+ "generate_questions(con,pre)"
585
+ ]
586
+ },
587
+ {
588
+ "cell_type": "markdown",
589
+ "metadata": {
590
+ "id": "DMTybR3PVuoC"
591
+ },
592
+ "source": [
593
+ "### Creating a Chain for AI Evaluation"
594
+ ]
595
+ },
596
+ {
597
+ "cell_type": "code",
598
+ "execution_count": 12,
599
+ "metadata": {
600
+ "id": "Wc-3XAFQVxO_"
601
+ },
602
+ "outputs": [],
603
+ "source": [
604
+ "template_evaluation = \"\"\"\n",
605
+ "Given\n",
606
+ "1. The follwing context of the oral exam/presentation: {context} \\\n",
607
+ "\n",
608
+ "2. The answer from the student: {transcript} \\\n",
609
+ "\n",
610
+ "3. The Questions asked to the student and student answers {QA} \\\n",
611
+ "\n",
612
+ "Please evaluate the students performance based on {instructions} \\\n",
613
+ "\n",
614
+ "If no instruction is provided, you can evaluate based on your judgement of the students performance.\n",
615
+ "\n",
616
+ "\"\"\""
617
+ ]
618
+ },
619
+ {
620
+ "cell_type": "code",
621
+ "execution_count": 13,
622
+ "metadata": {
623
+ "colab": {
624
+ "base_uri": "https://localhost:8080/"
625
+ },
626
+ "id": "FZXeYNSVVy5g",
627
+ "outputId": "94a1a4be-03a6-4c20-97b8-4333910991fa"
628
+ },
629
+ "outputs": [
630
+ {
631
+ "output_type": "execute_result",
632
+ "data": {
633
+ "text/plain": [
634
+ "['QA', 'context', 'instructions', 'transcript']"
635
+ ]
636
+ },
637
+ "metadata": {},
638
+ "execution_count": 13
639
+ }
640
+ ],
641
+ "source": [
642
+ "# @title\n",
643
+ "evaluate_template = ChatPromptTemplate.from_template(template_evaluation)\n",
644
+ "evaluate_template.messages[0].prompt.input_variables"
645
+ ]
646
+ },
647
+ {
648
+ "cell_type": "markdown",
649
+ "metadata": {
650
+ "id": "a3WUL_hFyMkr"
651
+ },
652
+ "source": [
653
+ "### Test process_file"
654
+ ]
655
+ },
656
+ {
657
+ "cell_type": "code",
658
+ "execution_count": null,
659
+ "metadata": {
660
+ "colab": {
661
+ "base_uri": "https://localhost:8080/",
662
+ "height": 157
663
+ },
664
+ "id": "LJX1AKTMyVm8",
665
+ "outputId": "24f8de7a-f456-47bc-b3b5-0284bbd7076f"
666
+ },
667
+ "outputs": [
668
+ {
669
+ "data": {
670
+ "application/vnd.google.colaboratory.intrinsic+json": {
671
+ "type": "string"
672
+ },
673
+ "text/plain": [
674
+ "\"\\ufeffHello,\\n\\n\\nWe are so excited for this semester’s partnership with Data Science Institute and Next Steps at Vanderbilt. Jonathan Wade will be interning Mondays and Wednesdays 2-5 pm and Fridays 12-4 pm starting Monday, January 31st. Jessica will be job coaching on Mondays and Wednesdays from 2-5 pm. It also used to be on Fridays from 2-4 pm but not anymore.\\n\\n\\nBelow is important information and reminders:\\n\\n\\nLocation: 1400 18th Ave S Building, Suite 2000, Nashville, TN 37212\\n\\n\\nDress: Business casual attire (This includes items such as dress pants, khakis, polos and dress shirts)\\n\\n\\nImportant Dates:\\n\\n\\nVanderbilt University is off for Spring Break March 5th - March 13th. No internships will take place these days.\\n\\n\\nAll internships end by Thursday, April 28th.\\n\\n\\nImportant COVID-19 Information: Attached is a document outlining the COVID-19 guidelines all Vanderbilt University students must follow, including Next Steps interns and job coaches while at internship sites. Please note these may change given the evolving nature of the pandemic and any changes will be communicated to internship sites, interns, and job coaches as needed.\\n\\n\\nCareer Development Resource Guide: I am also attaching the Next Steps Career Development Resource guide that outlines student expectations and provides helpful information and resources for site supervisors.\\n\\n\\nInternship Coordinator: Lynda Tricia is the coordinator for this internship and is the main point of contact for any questions or concerns that arise.\\n\\n\\nFinally, below you will find everyone's contact information for your convenience.\\n\\n\\nContacts:\\n\\n\\nIntern: Jonathan Wade, [email protected], 613-472-3867\\n\\n\\nSupervisor: Ruben Miller, [email protected], 216-574-3176\\n\\n\\nJob Coach: Jessica Cho, [email protected], 615-999-1134\\n\\n\\nInternship Coordinator: Lynda Tricia, [email protected], 606-415-9999\\n\\n\\nPlease let us know if there are any questions. Thank you!\\n\\n\\n\\n\\nNext Steps at Vanderbilt - Safety Guidelines for Internships\\nMore information on Vanderbilt’s Health and Safety Protocols can be found here: https://www.vanderbilt.edu/coronavirus/community/undergraduate-students/\\nMasks: All Next Steps interns and job coaches must wear masks indoors at internships even if the jobsite does not require masks.\\nInterns and job coaches should have a well-fitted mask that completely covers your nose and mouth, preferably a KN95, KF94 or FFP2 version.\\nLunch Breaks: If an intern or job coach needs a lunch break, they can remove their mask just when they eat or drink. They must be physically distanced from other co-workers when eating. \\nSymptom Monitoring: All interns and job coaches must be free of ANY symptoms related to COVID-19 to go to the internship. If an intern or job coach has symptoms, they should stay home, notify the Next Steps staff and internship supervisor, and get tested at Vanderbilt Student Health or with their medical provider.\\nAccording to the CDC, symptoms may appear 2 to 14 days after exposure to the virus. These include:\\n* Fever or chills\\n* Cough\\n* Shortness of breath or difficulty breathing\\n* Fatigue\\n* Muscle or body aches\\n* Headache\\n* New loss of taste or smell\\n* Sore throat\\n* Congestion or runny nose\\n* Nausea or vomiting\\n* Diarrhea\\n\\n\\nIf an intern or job coach tests positive: If an intern or job coach receives a COVID-19 positive test result, regardless of vaccination status, they should complete the following webform. The webform goes directly to the Vanderbilt Command Center.\\nThe intern or job coach will receive direct communication from the Command Center about their isolation (if they tested positive) or quarantine period (if considered a close contact) and will be instructed to contact Student Health or Occupational Health if they develop symptoms.\\nClose Contact/Quarantine: Interns or job coaches who are a close contact of someone who tests positive should complete the following webform. The webform goes directly to the Command Center.\\n* Close contacts who are unvaccinated will quarantine for 10 days based on CDC guidance.\\no Additional requirements are in place for days 10 to 14 following exposure including:\\n* For days 10 to 14 after last exposure, unvaccinated Vanderbilt community members identified as close contacts must not unmask at any time in public.\\n* Individuals should eat alone or complete any activities alone that require removing a mask in a private space during those four days between day 10-14.\\n* Close contacts who are vaccinated and asymptomatic will not have to quarantine but are recommended to monitor their symptoms and to get a COVID-19 test 5-7 days after last exposure. If asymptomatic, testing can be done at the VU testing center. If individuals develop symptoms, they should test at Student Health, Occupational Health or VUMC or other testing location in the community.\\n* Close contacts who are vaccinated and symptomatic may have to quarantine based on severity of symptoms and specific living situations. This determination will be made by their medical provider in consultation with the Command Center.\\nJob Coaching Supports: If a job coach tests positive and is unable to provide on-site supports, Next Steps staff will follow the procedures outlined below.\\n1. Identify another job coach or Next Steps staff member who can provide job coaching on-site during some of the student’s internship hours.\\n2. Identify another job coach or staff member who can check-in virtually with the student and supervisor during their shift. \\n3. Or, work with the student and supervisor to ensure natural supports are in place if the student must work without support of a job coach during that time period.\""
675
+ ]
676
+ },
677
+ "execution_count": 69,
678
+ "metadata": {},
679
+ "output_type": "execute_result"
680
+ }
681
+ ],
682
+ "source": [
683
+ "# Might need some way to make pdf file to load more readable\n",
684
+ "# process_file('/content/instrutor_note.docx')\n",
685
+ "# process_file('/content/Big Data & Economics.pdf')\n",
686
+ "# process_file('/content/Big Data & Economics.pdf')\n",
687
+ "process_file('/content/Anonymized Job Coach QA Test Doc (1).txt')"
688
+ ]
689
+ },
690
+ {
691
+ "cell_type": "markdown",
692
+ "metadata": {
693
+ "id": "M6IzVTjz5cex"
694
+ },
695
+ "source": [
696
+ "## UI Design\n"
697
+ ]
698
+ },
699
+ {
700
+ "cell_type": "markdown",
701
+ "metadata": {
702
+ "id": "u2SY4Akt_t8h"
703
+ },
704
+ "source": [
705
+ "### Chatbot V2"
706
+ ]
707
+ },
708
+ {
709
+ "cell_type": "code",
710
+ "execution_count": 17,
711
+ "metadata": {
712
+ "colab": {
713
+ "base_uri": "https://localhost:8080/",
714
+ "height": 853
715
+ },
716
+ "id": "6ENnsKlD_uOC",
717
+ "outputId": "7b694aa8-e314-4502-a590-3245ebc3e1e0"
718
+ },
719
+ "outputs": [
720
+ {
721
+ "output_type": "stream",
722
+ "name": "stdout",
723
+ "text": [
724
+ "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n",
725
+ "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n",
726
+ "\n",
727
+ "To create a public link, set `share=True` in `launch()`.\n"
728
+ ]
729
+ },
730
+ {
731
+ "output_type": "display_data",
732
+ "data": {
733
+ "text/plain": [
734
+ "<IPython.core.display.Javascript object>"
735
+ ],
736
+ "application/javascript": [
737
+ "(async (port, path, width, height, cache, element) => {\n",
738
+ " if (!google.colab.kernel.accessAllowed && !cache) {\n",
739
+ " return;\n",
740
+ " }\n",
741
+ " element.appendChild(document.createTextNode(''));\n",
742
+ " const url = await google.colab.kernel.proxyPort(port, {cache});\n",
743
+ "\n",
744
+ " const external_link = document.createElement('div');\n",
745
+ " external_link.innerHTML = `\n",
746
+ " <div style=\"font-family: monospace; margin-bottom: 0.5rem\">\n",
747
+ " Running on <a href=${new URL(path, url).toString()} target=\"_blank\">\n",
748
+ " https://localhost:${port}${path}\n",
749
+ " </a>\n",
750
+ " </div>\n",
751
+ " `;\n",
752
+ " element.appendChild(external_link);\n",
753
+ "\n",
754
+ " const iframe = document.createElement('iframe');\n",
755
+ " iframe.src = new URL(path, url).toString();\n",
756
+ " iframe.height = height;\n",
757
+ " iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n",
758
+ " iframe.width = width;\n",
759
+ " iframe.style.border = 0;\n",
760
+ " element.appendChild(iframe);\n",
761
+ " })(7860, \"/\", \"100%\", 500, false, window.element)"
762
+ ]
763
+ },
764
+ "metadata": {}
765
+ },
766
+ {
767
+ "output_type": "stream",
768
+ "name": "stderr",
769
+ "text": [
770
+ "/usr/local/lib/python3.10/dist-packages/gradio/processing_utils.py:188: UserWarning: Trying to convert audio automatically from int32 to 16-bit int format.\n",
771
+ " warnings.warn(warning.format(data.dtype))\n",
772
+ "/usr/local/lib/python3.10/dist-packages/gradio/processing_utils.py:188: UserWarning: Trying to convert audio automatically from int32 to 16-bit int format.\n",
773
+ " warnings.warn(warning.format(data.dtype))\n",
774
+ "/usr/local/lib/python3.10/dist-packages/gradio/processing_utils.py:188: UserWarning: Trying to convert audio automatically from int32 to 16-bit int format.\n",
775
+ " warnings.warn(warning.format(data.dtype))\n",
776
+ "/usr/local/lib/python3.10/dist-packages/gradio/processing_utils.py:188: UserWarning: Trying to convert audio automatically from int32 to 16-bit int format.\n",
777
+ " warnings.warn(warning.format(data.dtype))\n",
778
+ "/usr/local/lib/python3.10/dist-packages/gradio/processing_utils.py:188: UserWarning: Trying to convert audio automatically from int32 to 16-bit int format.\n",
779
+ " warnings.warn(warning.format(data.dtype))\n"
780
+ ]
781
+ },
782
+ {
783
+ "output_type": "stream",
784
+ "name": "stdout",
785
+ "text": [
786
+ "Keyboard interruption in main thread... closing server.\n"
787
+ ]
788
+ },
789
+ {
790
+ "output_type": "execute_result",
791
+ "data": {
792
+ "text/plain": [
793
+ "'\\nWhat Are the Different Types of Machine Learning?\\nHow Do You Handle Missing or Corrupted Data in a Dataset?\\nHow Can You Choose a Classifier Based on a Training Set Data Size?\\nExplain the Confusion Matrix with Respect to Machine Learning Algorithms.\\nWhat Are the Differences Between Machine Learning and Deep Learning\\n'"
794
+ ],
795
+ "application/vnd.google.colaboratory.intrinsic+json": {
796
+ "type": "string"
797
+ }
798
+ },
799
+ "metadata": {},
800
+ "execution_count": 17
801
+ }
802
+ ],
803
+ "source": [
804
+ "with gr.Blocks(theme=gr.themes.Monochrome()) as demo:\n",
805
+ " gr.Markdown(\"# Oral Exam App\")\n",
806
+ " gr.Markdown(\"## OpenAI API key\")\n",
807
+ " with gr.Box():\n",
808
+ " gr.HTML(\"\"\"Embed your OpenAI API key below; if you haven't created one already, visit\n",
809
+ " platform.openai.com/account/api-keys\n",
810
+ " to sign up for an account and get your personal API key\"\"\",\n",
811
+ " elem_classes=\"textbox_label\")\n",
812
+ " input = gr.Textbox(show_label=False, type=\"password\", container=False,\n",
813
+ " placeholder=\"●●●●●●●●●●●●●●●●●\")\n",
814
+ " input.change(fn=embed_key, inputs=input, outputs=None)\n",
815
+ "\n",
816
+ " with gr.Blocks():\n",
817
+ " #########################\n",
818
+ " #########Context#########\n",
819
+ " #########################\n",
820
+ " with gr.Accordion(\"Context section\"):\n",
821
+ " ### Should also allow vector stores\n",
822
+ " gr.Markdown(\"## Please upload the context document(s) for Oral exam\")\n",
823
+ " context_input = gr.File(label=\"Click to upload context file\",\n",
824
+ " file_count=\"multiple\",\n",
825
+ " file_types=[\".txt\", \".docx\", \".pdf\"])\n",
826
+ " outputs_context=gr.Textbox(label=\"Context\")\n",
827
+ " context_input.change(fn=process_file, inputs=context_input, outputs=outputs_context)\n",
828
+ " # upload_button = gr.Button(value=\"Show context\")\n",
829
+ " # upload_button.click(process_file, context_input, outputs_context)\n",
830
+ "\n",
831
+ " with gr.Blocks():\n",
832
+ " gr.Markdown(\"\"\"\n",
833
+ " ## Generate a Premade Prompt\n",
834
+ " Select your type and number of desired questions. Click \"Generate Prompt\" to get your premade prompt,\n",
835
+ " and then \"Insert Prompt into Chat\" to copy the text into the chat interface below. \\\n",
836
+ " You can also copy the prompt using the icon in the upper right corner and paste directly into the input box when interacting with the model.\n",
837
+ " \"\"\")\n",
838
+ " with gr.Row():\n",
839
+ " with gr.Column():\n",
840
+ " question_type = gr.Dropdown([\"Multiple Choice\", \"True or False\", \"Short Answer\", \"Fill in the Blank\", \"Random\"], label=\"Question Type\")\n",
841
+ " number_of_questions = gr.Textbox(label=\"Enter desired number of questions\")\n",
842
+ " sa_desired_length = gr.Dropdown([\"1-2\", \"3-4\", \"5-6\", \"6 or more\"], label = \"For short answer questions only, choose the desired sentence length for answers. The default value is 1-2 sentences.\")\n",
843
+ " with gr.Column():\n",
844
+ " prompt_button = gr.Button(\"Generate Prompt\")\n",
845
+ " premade_prompt_output = gr.Textbox(label=\"Generated prompt (save or copy)\", show_copy_button=True)\n",
846
+ " prompt_button.click(prompt_select,\n",
847
+ " inputs=[question_type, number_of_questions, sa_desired_length],\n",
848
+ " outputs=premade_prompt_output)\n",
849
+ "\n",
850
+ " #########################\n",
851
+ " #######Main Audio########\n",
852
+ " #########################\n",
853
+ " with gr.Accordion(\"Main audio section\"):\n",
854
+ " gr.Markdown(\"## Upload your audio file or start recording\")\n",
855
+ "\n",
856
+ " with gr.Column():\n",
857
+ " with gr.Row():\n",
858
+ " file_input = gr.Audio(label=\"Upload Audio\", source=\"upload\", type=\"filepath\")\n",
859
+ " record_inputs = gr.Audio(label=\"Record Audio\", source=\"microphone\", type=\"filepath\")\n",
860
+ "\n",
861
+ " gr.Markdown(\"## Transcribe the audio uploaded or recorded\")\n",
862
+ " outputs_transcribe=gr.Textbox(label=\"Transcription\")\n",
863
+ "\n",
864
+ " file_input.change(fn=transcribe, inputs=file_input, outputs=outputs_transcribe)\n",
865
+ " record_inputs.change(fn=transcribe, inputs=record_inputs, outputs=outputs_transcribe)\n",
866
+ "\n",
867
+ " # #########################\n",
868
+ " # ###Question Generation###\n",
869
+ " # #########################\n",
870
+ " # with gr.Accordion(\"Question section\"):\n",
871
+ " # gr.Markdown(\"## Questions\")\n",
872
+ " # with gr.Column():\n",
873
+ " # outputs_qa=gr.Textbox(label=\"Generate questions or Use your own questions\")\n",
874
+ " # btn3 = gr.Button(value=\"Generate questions\")\n",
875
+ " # btn3.click(generate_questions, inputs=context_input, outputs=outputs_qa)\n",
876
+ "\n",
877
+ "\n",
878
+ " ########################\n",
879
+ " ##Question Generation###\n",
880
+ " ########################\n",
881
+ " with gr.Accordion(\"Question section\"):\n",
882
+ " gr.Markdown(\"## Questions\")\n",
883
+ " with gr.Row():\n",
884
+ " with gr.Column():\n",
885
+ " outputs_qa=gr.Textbox(label=\"Generate questions or Use your own questions\")\n",
886
+ " btn1 = gr.Button(value=\"Generate questions\")\n",
887
+ " btn1.click(generate_questions, inputs=[outputs_context, premade_prompt_output], outputs=outputs_qa)\n",
888
+ "\n",
889
+ " # with gr.Column():\n",
890
+ " # submit_question=gr.Textbox(label=\"Use existing questions\")\n",
891
+ " # btn4 = gr.Button(value=\"Use these questions\")\n",
892
+ " # btn4.click(use_this_question, inputs=outputs_transcribe, outputs=None)\n",
893
+ "\n",
894
+ "\n",
895
+ " #########################\n",
896
+ " #######Instruction#######\n",
897
+ " #########################\n",
898
+ " instruction_qa_input = gr.File(label=\"Click to upload instruction file\",\n",
899
+ " file_count=\"multiple\",\n",
900
+ " file_types=[\".txt\", \".docx\", \".pdf\"])\n",
901
+ " instruction_qa=gr.Textbox(label=\"Or please enter the instruction for question/answering section\")\n",
902
+ " instruction_qa.change(fn=process_file, inputs=context_input, outputs=outputs_context)\n",
903
+ "\n",
904
+ "\n",
905
+ " #########################\n",
906
+ " #########Audio QA########\n",
907
+ " #########################\n",
908
+ " with gr.Accordion(\"Audio QA section\"):\n",
909
+ " gr.Markdown(\"## Question answering\")\n",
910
+ " gr.Markdown(\"### When you are ready to answer questions, press the 'I am ready' button\")\n",
911
+ " ##### This may be iterative\n",
912
+ " chatbot = gr.Chatbot([],\n",
913
+ " elem_id=\"chatbot\",\n",
914
+ " height=300)\n",
915
+ " state = gr.State()\n",
916
+ " message = gr.Textbox(show_label=False,\n",
917
+ " placeholder=\"Your answer will be transcribed here\",\n",
918
+ " container=False)\n",
919
+ " ready_button = gr.Button(value=\"I am ready\")\n",
920
+ " ready_button.click(message_and_history, inputs=[message, instruction_qa, outputs_qa, state], outputs=[chatbot, state])\n",
921
+ "\n",
922
+ " hidden = gr.Textbox(visible = False)\n",
923
+ " btn_record = gr.Audio(label=\"Record Audio\", source=\"microphone\", type=\"filepath\")\n",
924
+ " btn_record.change(fn=transcribe, inputs=btn_record, outputs=message)\n",
925
+ " btn_record.clear(use_these_questions, inputs = hidden, outputs = message)\n",
926
+ "\n",
927
+ " submit = gr.Button(\"Submit\")\n",
928
+ " submit.click(message_and_history,\n",
929
+ " inputs=[message, instruction_qa, outputs_qa, state],\n",
930
+ " outputs=[chatbot, state])\n",
931
+ "\n",
932
+ " message_records = gr.Textbox(show_label=False,\n",
933
+ " container=False)\n",
934
+ " show_records = gr.Button(\"Show QA history\")\n",
935
+ " show_records.click(use_these_questions,\n",
936
+ " inputs=state,\n",
937
+ " outputs=message_records)\n",
938
+ "\n",
939
+ " #########################\n",
940
+ " #######Evaluation########\n",
941
+ " #########################\n",
942
+ " with gr.Accordion(\"Evaluation section\"):\n",
943
+ " gr.Markdown(\"## Evaluation\")\n",
944
+ " with gr.Tab(\"General evalution\"):\n",
945
+ " evalution=gr.Textbox(label=\"AI Evaluation\")\n",
946
+ " btn5 = gr.Button(value=\"Evaluate\")\n",
947
+ " btn5.click(ai_evaluate, inputs=[outputs_context, outputs_transcribe, message_records, instruction_qa], outputs=evalution)\n",
948
+ " with gr.Tab(\"Quantitative evalution\"):\n",
949
+ " table_output = gr.Dataframe(label = \"Some kind of evaluation metrics?\")\n",
950
+ " btn6 = gr.Button(value=\"Evaluate\")\n",
951
+ " # btn6.click(ai_evaluate, inputs=[outputs_context, message_records, outputs_qa], outputs=table_output)\n",
952
+ "\n",
953
+ " # demo.launch()\n",
954
+ " # demo.launch(share=True)\n",
955
+ " demo.launch(debug=True)\n",
956
+ "\n",
957
+ "'''\n",
958
+ "What Are the Different Types of Machine Learning?\n",
959
+ "How Do You Handle Missing or Corrupted Data in a Dataset?\n",
960
+ "How Can You Choose a Classifier Based on a Training Set Data Size?\n",
961
+ "Explain the Confusion Matrix with Respect to Machine Learning Algorithms.\n",
962
+ "What Are the Differences Between Machine Learning and Deep Learning\n",
963
+ "'''"
964
+ ]
965
+ },
966
+ {
967
+ "cell_type": "markdown",
968
+ "metadata": {
969
+ "id": "g2EVIogW69Fd"
970
+ },
971
+ "source": [
972
+ "## What's left\n",
973
+ "- vector store (link) upload\n",
974
+ "- how to not show the warning when transcribing\n",
975
+ "- better prompt for evaluation\n",
976
+ "- try ChatInterface of Gradio"
977
+ ]
978
+ },
979
+ {
980
+ "cell_type": "code",
981
+ "source": [],
982
+ "metadata": {
983
+ "id": "-YwOAtNANrx_"
984
+ },
985
+ "execution_count": null,
986
+ "outputs": []
987
+ }
988
+ ],
989
+ "metadata": {
990
+ "colab": {
991
+ "provenance": [],
992
+ "include_colab_link": true
993
+ },
994
+ "kernelspec": {
995
+ "display_name": "Python 3",
996
+ "name": "python3"
997
+ },
998
+ "language_info": {
999
+ "name": "python"
1000
+ }
1001
+ },
1002
+ "nbformat": 4,
1003
+ "nbformat_minor": 0
1004
+ }
lo-achievement/ai_classroom_suite/IOHelperUtilities.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/helper_utilities.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['check_is_colab', 'MultiFileChooser', 'setup_drives']
5
+
6
+ # %% ../nbs/helper_utilities.ipynb 3
7
+ import ipywidgets as widgets
8
+ from IPython.display import display, clear_output
9
+ from functools import partial
10
+ from ipyfilechooser import FileChooser
11
+ import os
12
+
13
+ # %% ../nbs/helper_utilities.ipynb 4
14
+ def check_is_colab():
15
+ """
16
+ Check if the current environment is Google Colab.
17
+ """
18
+ try:
19
+ import google.colab
20
+ return True
21
+ except:
22
+ return False
23
+
24
+ # %% ../nbs/helper_utilities.ipynb 7
25
+ class MultiFileChooser:
26
+ def __init__(self):
27
+ self.fc = FileChooser('.')
28
+ self.fc.title = "Use the following file chooser to add each file individually.\n You can remove files by clicking the remove button."
29
+ self.fc.use_dir_icons = True
30
+ self.fc.show_only_dirs = False
31
+ self.selected_files = []
32
+
33
+ self.fc.register_callback(self.file_selected)
34
+
35
+ self.output = widgets.Output()
36
+
37
+ def file_selected(self, chooser):
38
+ if self.fc.selected is not None and self.fc.selected not in self.selected_files:
39
+ self.selected_files.append(self.fc.selected)
40
+ self.update_display()
41
+
42
+ def update_display(self):
43
+ with self.output:
44
+ clear_output()
45
+ for this_file in self.selected_files:
46
+ remove_button = widgets.Button(description="Remove", tooltip="Remove this file")
47
+ remove_button.on_click(partial(self.remove_file, file=this_file))
48
+ display(widgets.HBox([widgets.Label(value=this_file), remove_button]))
49
+
50
+ def remove_file(self, button, this_file):
51
+ if this_file in self.selected_files:
52
+ self.selected_files.remove(this_file)
53
+ self.update_display()
54
+
55
+ def display(self):
56
+ display(self.fc, self.output)
57
+
58
+ def get_selected_files(self):
59
+ return self.selected_files
60
+
61
+ # %% ../nbs/helper_utilities.ipynb 12
62
+ def setup_drives(upload_set):
63
+
64
+ upload_set = upload_set.lower()
65
+ uploaded = None
66
+
67
+ # allow them to mount the drive if they chose Google Colab.
68
+ if upload_set == 'google drive':
69
+ if check_is_colab():
70
+ from google.colab import drive
71
+ drive.mount('/content/drive')
72
+ else:
73
+ raise ValueError("It looks like you're not on Google Colab. Google Drive mounting is currently only implemented for Google Colab.")
74
+
75
+ # Everything else means that they'll need to use a file chooser (including Google Drive)
76
+ if check_is_colab():
77
+ from google.colab import files
78
+ uploaded = files.upload()
79
+ else:
80
+ # Create file chooser and interact
81
+ mfc = MultiFileChooser()
82
+ mfc.display()
83
+ uploaded = mfc.get_selected_files()
84
+
85
+ return uploaded
lo-achievement/ai_classroom_suite/MediaVectorStores.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/media_stores.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['rawtext_to_doc_split', 'files_to_text', 'youtube_to_text', 'save_text', 'get_youtube_transcript',
5
+ 'website_to_text_web', 'website_to_text_unstructured', 'get_document_segments', 'create_local_vector_store']
6
+
7
+ # %% ../nbs/media_stores.ipynb 3
8
+ # import libraries here
9
+ import os
10
+ import itertools
11
+
12
+ from langchain.embeddings import OpenAIEmbeddings
13
+
14
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
15
+ from langchain.document_loaders.unstructured import UnstructuredFileLoader
16
+ from langchain.document_loaders.generic import GenericLoader
17
+ from langchain.document_loaders.parsers import OpenAIWhisperParser
18
+ from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
19
+ from langchain.document_loaders import WebBaseLoader, UnstructuredURLLoader
20
+ from langchain.docstore.document import Document
21
+
22
+ from langchain.vectorstores import Chroma
23
+ from langchain.chains import RetrievalQAWithSourcesChain
24
+
25
+ # %% ../nbs/media_stores.ipynb 8
26
+ def rawtext_to_doc_split(text, chunk_size=1500, chunk_overlap=150):
27
+
28
+ # Quick type checking
29
+ if not isinstance(text, list):
30
+ text = [text]
31
+
32
+ # Create splitter
33
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
34
+ chunk_overlap=chunk_overlap,
35
+ add_start_index = True)
36
+
37
+ #Split into docs segments
38
+ if isinstance(text[0], Document):
39
+ doc_segments = text_splitter.split_documents(text)
40
+ else:
41
+ doc_segments = text_splitter.split_documents(text_splitter.create_documents(text))
42
+
43
+ # Make into one big list
44
+ doc_segments = list(itertools.chain(*doc_segments)) if isinstance(doc_segments[0], list) else doc_segments
45
+
46
+ return doc_segments
47
+
48
+ # %% ../nbs/media_stores.ipynb 16
49
+ ## A single File
50
+ def _file_to_text(single_file, chunk_size = 1000, chunk_overlap=150):
51
+
52
+ # Create loader and get segments
53
+ loader = UnstructuredFileLoader(single_file)
54
+ doc_segments = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=chunk_size,
55
+ chunk_overlap=chunk_overlap,
56
+ add_start_index=True))
57
+ return doc_segments
58
+
59
+
60
+ ## Multiple files
61
+ def files_to_text(files_list, chunk_size=1000, chunk_overlap=150):
62
+
63
+ # Quick type checking
64
+ if not isinstance(files_list, list):
65
+ files_list = [files_list]
66
+
67
+ # This is currently a fix because the UnstructuredFileLoader expects a list of files yet can't split them correctly yet
68
+ all_segments = [_file_to_text(single_file, chunk_size=chunk_size, chunk_overlap=chunk_overlap) for single_file in files_list]
69
+ all_segments = list(itertools.chain(*all_segments)) if isinstance(all_segments[0], list) else all_segments
70
+
71
+ return all_segments
72
+
73
+ # %% ../nbs/media_stores.ipynb 20
74
+ def youtube_to_text(urls, save_dir = "content"):
75
+ # Transcribe the videos to text
76
+ # save_dir: directory to save audio files
77
+
78
+ if not isinstance(urls, list):
79
+ urls = [urls]
80
+
81
+ youtube_loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser())
82
+ youtube_docs = youtube_loader.load()
83
+
84
+ return youtube_docs
85
+
86
+ # %% ../nbs/media_stores.ipynb 24
87
+ def save_text(text, text_name = None):
88
+ if not text_name:
89
+ text_name = text[:20]
90
+ text_path = os.path.join("/content",text_name+".txt")
91
+
92
+ with open(text_path, "x") as f:
93
+ f.write(text)
94
+ # Return the location at which the transcript is saved
95
+ return text_path
96
+
97
+ # %% ../nbs/media_stores.ipynb 25
98
+ def get_youtube_transcript(yt_url, save_transcript = False, temp_audio_dir = "sample_data"):
99
+ # Transcribe the videos to text and save to file in /content
100
+ # save_dir: directory to save audio files
101
+
102
+ youtube_docs = youtube_to_text(yt_url, save_dir = temp_audio_dir)
103
+
104
+ # Combine doc
105
+ combined_docs = [doc.page_content for doc in youtube_docs]
106
+ combined_text = " ".join(combined_docs)
107
+
108
+ # Save text to file
109
+ video_path = youtube_docs[0].metadata["source"]
110
+ youtube_name = os.path.splitext(os.path.basename(video_path))[0]
111
+
112
+ save_path = None
113
+ if save_transcript:
114
+ save_path = save_text(combined_text, youtube_name)
115
+
116
+ return youtube_docs, save_path
117
+
118
+ # %% ../nbs/media_stores.ipynb 27
119
+ def website_to_text_web(url, chunk_size = 1500, chunk_overlap=100):
120
+
121
+ # Url can be a single string or list
122
+ website_loader = WebBaseLoader(url)
123
+ website_raw = website_loader.load()
124
+
125
+ website_data = rawtext_to_doc_split(website_raw, chunk_size = chunk_size, chunk_overlap=chunk_overlap)
126
+
127
+ # Combine doc
128
+ return website_data
129
+
130
+ # %% ../nbs/media_stores.ipynb 33
131
+ def website_to_text_unstructured(web_urls, chunk_size = 1500, chunk_overlap=100):
132
+
133
+ # Make sure it's a list
134
+ if not isinstance(web_urls, list):
135
+ web_urls = [web_urls]
136
+
137
+ # Url can be a single string or list
138
+ website_loader = UnstructuredURLLoader(web_urls)
139
+ website_raw = website_loader.load()
140
+
141
+ website_data = rawtext_to_doc_split(website_raw, chunk_size = chunk_size, chunk_overlap=chunk_overlap)
142
+
143
+ # Return individual docs or list
144
+ return website_data
145
+
146
+ # %% ../nbs/media_stores.ipynb 45
147
+ def get_document_segments(context_info, data_type, chunk_size = 1500, chunk_overlap=100):
148
+
149
+ load_fcn = None
150
+ addtnl_params = {'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap}
151
+
152
+ # Define function use to do the loading
153
+ if data_type == 'text':
154
+ load_fcn = rawtext_to_doc_split
155
+ elif data_type == 'web_page':
156
+ load_fcn = website_to_text_unstructured
157
+ elif data_type == 'youtube_video':
158
+ load_fcn = youtube_to_text
159
+ else:
160
+ load_fcn = files_to_text
161
+
162
+ # Get the document segments
163
+ doc_segments = load_fcn(context_info, **addtnl_params)
164
+
165
+ return doc_segments
166
+
167
+ # %% ../nbs/media_stores.ipynb 47
168
+ def create_local_vector_store(document_segments, **retriever_kwargs):
169
+ embeddings = OpenAIEmbeddings()
170
+ db = Chroma.from_documents(document_segments, embeddings)
171
+ retriever = db.as_retriever(**retriever_kwargs)
172
+
173
+ return db, retriever
lo-achievement/ai_classroom_suite/PromptInteractionBase.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/prompt_interaction_base.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['SYSTEM_TUTOR_TEMPLATE', 'HUMAN_RESPONSE_TEMPLATE', 'HUMAN_RETRIEVER_RESPONSE_TEMPLATE', 'DEFAULT_ASSESSMENT_MSG',
5
+ 'DEFAULT_LEARNING_OBJS_MSG', 'DEFAULT_CONDENSE_PROMPT_TEMPLATE', 'DEFAULT_QUESTION_PROMPT_TEMPLATE',
6
+ 'DEFAULT_COMBINE_PROMPT_TEMPLATE', 'create_model', 'set_openai_key', 'create_base_tutoring_prompt',
7
+ 'get_tutoring_prompt', 'get_tutoring_answer', 'create_tutor_mdl_chain']
8
+
9
+ # %% ../nbs/prompt_interaction_base.ipynb 3
10
+ from langchain.chat_models import ChatOpenAI
11
+ from langchain.llms import OpenAI
12
+
13
+ from langchain import PromptTemplate
14
+ from langchain.prompts import ChatPromptTemplate, PromptTemplate
15
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
16
+ from langchain.chains import LLMChain, ConversationalRetrievalChain, RetrievalQAWithSourcesChain
17
+ from langchain.chains.base import Chain
18
+
19
+ from getpass import getpass
20
+
21
+ import os
22
+
23
+ # %% ../nbs/prompt_interaction_base.ipynb 5
24
+ def create_model(openai_mdl='gpt-3.5-turbo-16k', temperature=0.1, **chatopenai_kwargs):
25
+ llm = ChatOpenAI(model_name = openai_mdl, temperature=temperature, **chatopenai_kwargs)
26
+
27
+ return llm
28
+
29
+ # %% ../nbs/prompt_interaction_base.ipynb 6
30
+ def set_openai_key():
31
+ openai_api_key = getpass()
32
+ os.environ["OPENAI_API_KEY"] = openai_api_key
33
+
34
+ return
35
+
36
+ # %% ../nbs/prompt_interaction_base.ipynb 10
37
+ # Create system prompt template
38
+ SYSTEM_TUTOR_TEMPLATE = ("You are a world-class tutor helping students to perform better on oral and written exams though interactive experiences. " +
39
+ "When assessing and evaluating students, you always ask one question at a time, and wait for the student's response before " +
40
+ "providing them with feedback. Asking one question at a time, waiting for the student's response, and then commenting " +
41
+ "on the strengths and weaknesses of their responses (when appropriate) is what makes you such a sought-after, world-class tutor.")
42
+
43
+ # Create a human response template
44
+ HUMAN_RESPONSE_TEMPLATE = ("I'm trying to better understand the text provided below. {assessment_request} The learning objectives to be assessed are: " +
45
+ "{learning_objectives}. Although I may request more than one assessment question, you should " +
46
+ "only provide ONE question in you initial response. Do not include the answer in your response. " +
47
+ "If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional " +
48
+ "chances to respond until I get the correct choice. Explain why the correct choice is right. " +
49
+ "The text that you will base your questions on is as follows: {context}.")
50
+
51
+ HUMAN_RETRIEVER_RESPONSE_TEMPLATE = ("I want to master the topics based on the excerpts of the text below. Given the following extracted text from long documents, {assessment_request} The learning objectives to be assessed are: " +
52
+ "{learning_objectives}. Although I may request more than one assessment question, you should " +
53
+ "only provide ONE question in you initial response. Do not include the answer in your response. " +
54
+ "If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional " +
55
+ "chances to respond until I get the correct choice. Explain why the correct choice is right. " +
56
+ "The extracted text from long documents are as follows: {summaries}.")
57
+
58
+ def create_base_tutoring_prompt(system_prompt=None, human_prompt=None):
59
+
60
+ #setup defaults using defined values
61
+ if system_prompt == None:
62
+ system_prompt = PromptTemplate(template = SYSTEM_TUTOR_TEMPLATE,
63
+ input_variables = [])
64
+
65
+ if human_prompt==None:
66
+ human_prompt = PromptTemplate(template = HUMAN_RESPONSE_TEMPLATE,
67
+ input_variables=['assessment_request', 'learning_objectives', 'context'])
68
+
69
+ # Create prompt messages
70
+ system_tutor_msg = SystemMessagePromptTemplate(prompt=system_prompt)
71
+ human_tutor_msg = HumanMessagePromptTemplate(prompt= human_prompt)
72
+
73
+ # Create ChatPromptTemplate
74
+ chat_prompt = ChatPromptTemplate.from_messages([system_tutor_msg, human_tutor_msg])
75
+
76
+ return chat_prompt
77
+
78
+ # %% ../nbs/prompt_interaction_base.ipynb 14
79
+ DEFAULT_ASSESSMENT_MSG = 'Please design a 5 question short answer quiz about the provided text.'
80
+ DEFAULT_LEARNING_OBJS_MSG = 'Identify and comprehend the important topics and underlying messages and connections within the text'
81
+
82
+ def get_tutoring_prompt(context, chat_template=None, assessment_request = None, learning_objectives = None, **kwargs):
83
+
84
+ # set defaults
85
+ if chat_template is None:
86
+ chat_template = create_base_tutoring_prompt()
87
+ else:
88
+ if not all([prompt_var in chat_template.input_variables
89
+ for prompt_var in ['context', 'assessment_request', 'learning_objectives']]):
90
+ raise KeyError('''It looks like you may have a custom chat_template. Either include context, assessment_request, and learning objectives
91
+ as input variables or create your own tutoring prompt.''')
92
+
93
+ if assessment_request is None and 'assessment_request':
94
+ assessment_request = DEFAULT_ASSESSMENT_MSG
95
+
96
+ if learning_objectives is None:
97
+ learning_objectives = DEFAULT_LEARNING_OBJS_MSG
98
+
99
+ # compose final prompt
100
+ tutoring_prompt = chat_template.format_prompt(context=context,
101
+ assessment_request = assessment_request,
102
+ learning_objectives = learning_objectives,
103
+ **kwargs)
104
+
105
+ return tutoring_prompt
106
+
107
+
108
+ # %% ../nbs/prompt_interaction_base.ipynb 18
109
+ def get_tutoring_answer(context, tutor_mdl, chat_template=None, assessment_request=None, learning_objectives=None, return_dict=False, call_kwargs={}, input_kwargs={}):
110
+
111
+ # Get answer from chat
112
+
113
+ # set defaults
114
+ if assessment_request is None:
115
+ assessment_request = DEFAULT_ASSESSMENT_MSG
116
+ if learning_objectives is None:
117
+ learning_objectives = DEFAULT_LEARNING_OBJS_MSG
118
+
119
+ common_inputs = {'assessment_request':assessment_request, 'learning_objectives':learning_objectives}
120
+
121
+ # get answer based on interaction type
122
+ if isinstance(tutor_mdl, ChatOpenAI):
123
+ human_ask_prompt = get_tutoring_prompt(context, chat_template, assessment_request, learning_objectives)
124
+ tutor_answer = tutor_mdl(human_ask_prompt.to_messages())
125
+
126
+ if not return_dict:
127
+ final_answer = tutor_answer.content
128
+
129
+ elif isinstance(tutor_mdl, Chain):
130
+ if isinstance(tutor_mdl, RetrievalQAWithSourcesChain):
131
+ if 'question' not in input_kwargs.keys():
132
+ common_inputs['question'] = assessment_request
133
+ final_inputs = {**common_inputs, **input_kwargs}
134
+ else:
135
+ common_inputs['context'] = context
136
+ final_inputs = {**common_inputs, **input_kwargs}
137
+
138
+ # get answer
139
+ tutor_answer = tutor_mdl(final_inputs, **call_kwargs)
140
+ final_answer = tutor_answer
141
+
142
+ if not return_dict:
143
+ final_answer = final_answer['answer']
144
+
145
+ else:
146
+ raise NotImplementedError(f"tutor_mdl of type {type(tutor_mdl)} is not supported.")
147
+
148
+ return final_answer
149
+
150
+ # %% ../nbs/prompt_interaction_base.ipynb 19
151
+ DEFAULT_CONDENSE_PROMPT_TEMPLATE = ("Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, " +
152
+ "in its original language.\n\nChat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:")
153
+
154
+ DEFAULT_QUESTION_PROMPT_TEMPLATE = ("Use the following portion of a long document to see if any of the text is relevant to creating a response to the question." +
155
+ "\nReturn any relevant text verbatim.\n{context}\nQuestion: {question}\nRelevant text, if any:")
156
+
157
+ DEFAULT_COMBINE_PROMPT_TEMPLATE = ("Given the following extracted parts of a long document and the given prompt, create a final answer with references ('SOURCES'). "+
158
+ "If you don't have a response, just say that you are unable to come up with a response. "+
159
+ "\nSOURCES:\n\nQUESTION: {question}\n=========\n{summaries}\n=========\nFINAL ANSWER:'")
160
+
161
+ def create_tutor_mdl_chain(kind='llm', mdl=None, prompt_template = None, **kwargs):
162
+
163
+ #Validate parameters
164
+ if mdl is None:
165
+ mdl = create_model()
166
+ kind = kind.lower()
167
+
168
+ #Create model chain
169
+ if kind == 'llm':
170
+ if prompt_template is None:
171
+ prompt_template = create_base_tutoring_prompt()
172
+ mdl_chain = LLMChain(llm=mdl, prompt=prompt_template, **kwargs)
173
+ elif kind == 'conversational':
174
+ if prompt_template is None:
175
+ prompt_template = PromptTemplate.from_template(DEFAULT_CONDENSE_PROMPT_TEMPLATE)
176
+ mdl_chain = ConversationalRetrieverChain.from_llm(mdl, condense_question_prompt = prompt_template, **kwargs)
177
+ elif kind == 'retrieval_qa':
178
+ if prompt_template is None:
179
+
180
+ #Create custom human prompt to take in summaries
181
+ human_prompt = PromptTemplate(template = HUMAN_RETRIEVER_RESPONSE_TEMPLATE,
182
+ input_variables=['assessment_request', 'learning_objectives', 'summaries'])
183
+ prompt_template = create_base_tutoring_prompt(human_prompt=human_prompt)
184
+
185
+ #Create the combination prompt and model
186
+ question_template = PromptTemplate.from_template(DEFAULT_QUESTION_PROMPT_TEMPLATE)
187
+ mdl_chain = RetrievalQAWithSourcesChain.from_llm(llm=mdl, question_prompt=question_template, combine_prompt = prompt_template, **kwargs)
188
+ else:
189
+ raise NotImplementedError(f"Model kind {kind} not implemented")
190
+
191
+ return mdl_chain
lo-achievement/ai_classroom_suite/SelfStudyPrompts.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/self_study_prompts.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['MC_QUIZ_DEFAULT', 'SHORT_ANSWER_DEFAULT', 'FILL_BLANK_DEFAULT', 'SEQUENCING_DEFAULT', 'RELATIONSHIP_DEFAULT',
5
+ 'CONCEPTS_DEFAULT', 'REAL_WORLD_EXAMPLE_DEFAULT', 'RANDOMIZED_QUESTIONS_DEFAULT', 'SELF_STUDY_PROMPT_NAMES',
6
+ 'SELF_STUDY_DEFAULTS', 'list_all_self_study_prompt_keys', 'list_all_self_study_prompts',
7
+ 'list_default_self_prompt_varnames', 'print_all_self_study_prompts']
8
+
9
+ # %% ../nbs/self_study_prompts.ipynb 4
10
+ # used for pretty display
11
+ import pandas as pd
12
+
13
+ # %% ../nbs/self_study_prompts.ipynb 5
14
+ MC_QUIZ_DEFAULT = "Please design a 5 question multiple choice quiz about the provided text."
15
+
16
+ SHORT_ANSWER_DEFAULT = ("Please design a 5 question short answer quiz about the provided text. "
17
+ "The question types should be short answer. Expect the correct answers to be a few sentences long.")
18
+
19
+ FILL_BLANK_DEFAULT = """Create a 5 question fill in the blank quiz referencing parts of the provided text.
20
+ The "blank" part of the question should appear as "________". The answers should reflect what word(s) should go in the blank an accurate statement.
21
+ An example is as follows: "The author of the book is ______." The question should be a statement.
22
+ """
23
+
24
+ SEQUENCING_DEFAULT = """Create a 5 question questionnaire that will ask me to recall the steps or sequence of events
25
+ in the provided text."""
26
+
27
+ RELATIONSHIP_DEFAULT = ("Create a 5 question quiz for the student that asks the student to identify relationships between"
28
+ "topics or concepts that are important to understanding this text.")
29
+
30
+ CONCEPTS_DEFAULT = """ Design a 5 question quiz that asks me about definitions or concepts of importance in the provided text."""
31
+
32
+ REAL_WORLD_EXAMPLE_DEFAULT = """Demonstrate how the provided context can be applied to solve a real world problem.
33
+ Ask me questions about how the demonstration you provided relates to solving a real world problem."""
34
+
35
+ RANDOMIZED_QUESTIONS_DEFAULT = """Generate a high-quality assessment consisting of 5 varied questions,
36
+ each of different types (open-ended, multiple choice, short answer, analogies, etc.)"""
37
+
38
+ SELF_STUDY_PROMPT_NAMES = ['MC_QUIZ_DEFAULT',
39
+ 'SHORT_ANSWER_DEFAULT',
40
+ 'FILL_BLANK_DEFAULT',
41
+ 'SEQUENCING_DEFAULT',
42
+ 'RELATIONSHIP_DEFAULT',
43
+ 'CONCEPTS_DEFAULT',
44
+ 'REAL_WORLD_EXAMPLE_DEFAULT',
45
+ 'RANDOMIZED_QUESTIONS_DEFAULT']
46
+
47
+ # %% ../nbs/self_study_prompts.ipynb 7
48
+ # Define self study dictionary for lookup
49
+ SELF_STUDY_DEFAULTS = {'mc': MC_QUIZ_DEFAULT,
50
+ 'short_answer': SHORT_ANSWER_DEFAULT,
51
+ 'fill_blank': FILL_BLANK_DEFAULT,
52
+ 'sequencing': SEQUENCING_DEFAULT,
53
+ 'relationships': RELATIONSHIP_DEFAULT,
54
+ 'concepts': CONCEPTS_DEFAULT,
55
+ 'real_world_example': REAL_WORLD_EXAMPLE_DEFAULT,
56
+ 'randomized_questions': RANDOMIZED_QUESTIONS_DEFAULT
57
+ }
58
+
59
+ # Return list of all self study prompts
60
+ def list_all_self_study_prompt_keys():
61
+ return list(SELF_STUDY_DEFAULTS.keys())
62
+
63
+ def list_all_self_study_prompts():
64
+ return list(SELF_STUDY_DEFAULTS.values())
65
+
66
+ # Return list of all self study variable names
67
+ def list_default_self_prompt_varnames():
68
+ return SELF_STUDY_PROMPT_NAMES
69
+
70
+ # Print as a table
71
+ def print_all_self_study_prompts():
72
+ with pd.option_context('max_colwidth', None):
73
+ display(pd.DataFrame({'SELF_STUDY_DEFAULTS key': list(SELF_STUDY_DEFAULTS.keys()),
74
+ 'Prompt': list(SELF_STUDY_DEFAULTS.values())}))
75
+
lo-achievement/ai_classroom_suite/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = "0.0.1"
lo-achievement/ai_classroom_suite/_modidx.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Autogenerated by nbdev
2
+
3
+ d = { 'settings': { 'branch': 'main',
4
+ 'doc_baseurl': '/lo-achievement',
5
+ 'doc_host': 'https://vanderbilt-data-science.github.io',
6
+ 'git_url': 'https://github.com/vanderbilt-data-science/lo-achievement',
7
+ 'lib_path': 'ai_classroom_suite'},
8
+ 'syms': { 'ai_classroom_suite.IOHelperUtilities': { 'ai_classroom_suite.IOHelperUtilities.MultiFileChooser': ( 'helper_utilities.html#multifilechooser',
9
+ 'ai_classroom_suite/IOHelperUtilities.py'),
10
+ 'ai_classroom_suite.IOHelperUtilities.MultiFileChooser.__init__': ( 'helper_utilities.html#multifilechooser.__init__',
11
+ 'ai_classroom_suite/IOHelperUtilities.py'),
12
+ 'ai_classroom_suite.IOHelperUtilities.MultiFileChooser.display': ( 'helper_utilities.html#multifilechooser.display',
13
+ 'ai_classroom_suite/IOHelperUtilities.py'),
14
+ 'ai_classroom_suite.IOHelperUtilities.MultiFileChooser.file_selected': ( 'helper_utilities.html#multifilechooser.file_selected',
15
+ 'ai_classroom_suite/IOHelperUtilities.py'),
16
+ 'ai_classroom_suite.IOHelperUtilities.MultiFileChooser.get_selected_files': ( 'helper_utilities.html#multifilechooser.get_selected_files',
17
+ 'ai_classroom_suite/IOHelperUtilities.py'),
18
+ 'ai_classroom_suite.IOHelperUtilities.MultiFileChooser.remove_file': ( 'helper_utilities.html#multifilechooser.remove_file',
19
+ 'ai_classroom_suite/IOHelperUtilities.py'),
20
+ 'ai_classroom_suite.IOHelperUtilities.MultiFileChooser.update_display': ( 'helper_utilities.html#multifilechooser.update_display',
21
+ 'ai_classroom_suite/IOHelperUtilities.py'),
22
+ 'ai_classroom_suite.IOHelperUtilities.check_is_colab': ( 'helper_utilities.html#check_is_colab',
23
+ 'ai_classroom_suite/IOHelperUtilities.py'),
24
+ 'ai_classroom_suite.IOHelperUtilities.setup_drives': ( 'helper_utilities.html#setup_drives',
25
+ 'ai_classroom_suite/IOHelperUtilities.py')},
26
+ 'ai_classroom_suite.MediaVectorStores': { 'ai_classroom_suite.MediaVectorStores._file_to_text': ( 'media_stores.html#_file_to_text',
27
+ 'ai_classroom_suite/MediaVectorStores.py'),
28
+ 'ai_classroom_suite.MediaVectorStores.create_local_vector_store': ( 'media_stores.html#create_local_vector_store',
29
+ 'ai_classroom_suite/MediaVectorStores.py'),
30
+ 'ai_classroom_suite.MediaVectorStores.files_to_text': ( 'media_stores.html#files_to_text',
31
+ 'ai_classroom_suite/MediaVectorStores.py'),
32
+ 'ai_classroom_suite.MediaVectorStores.get_document_segments': ( 'media_stores.html#get_document_segments',
33
+ 'ai_classroom_suite/MediaVectorStores.py'),
34
+ 'ai_classroom_suite.MediaVectorStores.get_youtube_transcript': ( 'media_stores.html#get_youtube_transcript',
35
+ 'ai_classroom_suite/MediaVectorStores.py'),
36
+ 'ai_classroom_suite.MediaVectorStores.rawtext_to_doc_split': ( 'media_stores.html#rawtext_to_doc_split',
37
+ 'ai_classroom_suite/MediaVectorStores.py'),
38
+ 'ai_classroom_suite.MediaVectorStores.save_text': ( 'media_stores.html#save_text',
39
+ 'ai_classroom_suite/MediaVectorStores.py'),
40
+ 'ai_classroom_suite.MediaVectorStores.website_to_text_unstructured': ( 'media_stores.html#website_to_text_unstructured',
41
+ 'ai_classroom_suite/MediaVectorStores.py'),
42
+ 'ai_classroom_suite.MediaVectorStores.website_to_text_web': ( 'media_stores.html#website_to_text_web',
43
+ 'ai_classroom_suite/MediaVectorStores.py'),
44
+ 'ai_classroom_suite.MediaVectorStores.youtube_to_text': ( 'media_stores.html#youtube_to_text',
45
+ 'ai_classroom_suite/MediaVectorStores.py')},
46
+ 'ai_classroom_suite.PromptInteractionBase': { 'ai_classroom_suite.PromptInteractionBase.create_base_tutoring_prompt': ( 'prompt_interaction_base.html#create_base_tutoring_prompt',
47
+ 'ai_classroom_suite/PromptInteractionBase.py'),
48
+ 'ai_classroom_suite.PromptInteractionBase.create_model': ( 'prompt_interaction_base.html#create_model',
49
+ 'ai_classroom_suite/PromptInteractionBase.py'),
50
+ 'ai_classroom_suite.PromptInteractionBase.create_tutor_mdl_chain': ( 'prompt_interaction_base.html#create_tutor_mdl_chain',
51
+ 'ai_classroom_suite/PromptInteractionBase.py'),
52
+ 'ai_classroom_suite.PromptInteractionBase.get_tutoring_answer': ( 'prompt_interaction_base.html#get_tutoring_answer',
53
+ 'ai_classroom_suite/PromptInteractionBase.py'),
54
+ 'ai_classroom_suite.PromptInteractionBase.get_tutoring_prompt': ( 'prompt_interaction_base.html#get_tutoring_prompt',
55
+ 'ai_classroom_suite/PromptInteractionBase.py'),
56
+ 'ai_classroom_suite.PromptInteractionBase.set_openai_key': ( 'prompt_interaction_base.html#set_openai_key',
57
+ 'ai_classroom_suite/PromptInteractionBase.py')},
58
+ 'ai_classroom_suite.SelfStudyPrompts': { 'ai_classroom_suite.SelfStudyPrompts.list_all_self_study_prompt_keys': ( 'self_study_prompts.html#list_all_self_study_prompt_keys',
59
+ 'ai_classroom_suite/SelfStudyPrompts.py'),
60
+ 'ai_classroom_suite.SelfStudyPrompts.list_all_self_study_prompts': ( 'self_study_prompts.html#list_all_self_study_prompts',
61
+ 'ai_classroom_suite/SelfStudyPrompts.py'),
62
+ 'ai_classroom_suite.SelfStudyPrompts.list_default_self_prompt_varnames': ( 'self_study_prompts.html#list_default_self_prompt_varnames',
63
+ 'ai_classroom_suite/SelfStudyPrompts.py'),
64
+ 'ai_classroom_suite.SelfStudyPrompts.print_all_self_study_prompts': ( 'self_study_prompts.html#print_all_self_study_prompts',
65
+ 'ai_classroom_suite/SelfStudyPrompts.py')},
66
+ 'ai_classroom_suite.self_study_app': { 'ai_classroom_suite.self_study_app.SlightlyDelusionalTutor': ( 'gradio_application.html#slightlydelusionaltutor',
67
+ 'ai_classroom_suite/self_study_app.py'),
68
+ 'ai_classroom_suite.self_study_app.SlightlyDelusionalTutor.__init__': ( 'gradio_application.html#slightlydelusionaltutor.__init__',
69
+ 'ai_classroom_suite/self_study_app.py'),
70
+ 'ai_classroom_suite.self_study_app.SlightlyDelusionalTutor.add_user_message': ( 'gradio_application.html#slightlydelusionaltutor.add_user_message',
71
+ 'ai_classroom_suite/self_study_app.py'),
72
+ 'ai_classroom_suite.self_study_app.SlightlyDelusionalTutor.forget_conversation': ( 'gradio_application.html#slightlydelusionaltutor.forget_conversation',
73
+ 'ai_classroom_suite/self_study_app.py'),
74
+ 'ai_classroom_suite.self_study_app.SlightlyDelusionalTutor.get_sources_memory': ( 'gradio_application.html#slightlydelusionaltutor.get_sources_memory',
75
+ 'ai_classroom_suite/self_study_app.py'),
76
+ 'ai_classroom_suite.self_study_app.SlightlyDelusionalTutor.get_tutor_reply': ( 'gradio_application.html#slightlydelusionaltutor.get_tutor_reply',
77
+ 'ai_classroom_suite/self_study_app.py'),
78
+ 'ai_classroom_suite.self_study_app.SlightlyDelusionalTutor.initialize_llm': ( 'gradio_application.html#slightlydelusionaltutor.initialize_llm',
79
+ 'ai_classroom_suite/self_study_app.py'),
80
+ 'ai_classroom_suite.self_study_app.add_user_message': ( 'gradio_application.html#add_user_message',
81
+ 'ai_classroom_suite/self_study_app.py'),
82
+ 'ai_classroom_suite.self_study_app.create_reference_store': ( 'gradio_application.html#create_reference_store',
83
+ 'ai_classroom_suite/self_study_app.py'),
84
+ 'ai_classroom_suite.self_study_app.disable_until_done': ( 'gradio_application.html#disable_until_done',
85
+ 'ai_classroom_suite/self_study_app.py'),
86
+ 'ai_classroom_suite.self_study_app.embed_key': ( 'gradio_application.html#embed_key',
87
+ 'ai_classroom_suite/self_study_app.py'),
88
+ 'ai_classroom_suite.self_study_app.get_tutor_reply': ( 'gradio_application.html#get_tutor_reply',
89
+ 'ai_classroom_suite/self_study_app.py'),
90
+ 'ai_classroom_suite.self_study_app.prompt_select': ( 'gradio_application.html#prompt_select',
91
+ 'ai_classroom_suite/self_study_app.py'),
92
+ 'ai_classroom_suite.self_study_app.save_chatbot_dialogue': ( 'gradio_application.html#save_chatbot_dialogue',
93
+ 'ai_classroom_suite/self_study_app.py')}}}
lo-achievement/ai_classroom_suite/self_study_app.py ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/gradio_application.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['save_pdf', 'save_json', 'save_txt', 'save_csv', 'num_sources', 'css', 'save_chatbot_dialogue',
5
+ 'SlightlyDelusionalTutor', 'embed_key', 'create_reference_store', 'prompt_select', 'add_user_message',
6
+ 'get_tutor_reply', 'disable_until_done']
7
+
8
+ # %% ../nbs/gradio_application.ipynb 9
9
+ import gradio as gr
10
+ from functools import partial
11
+ import pandas as pd
12
+ import os
13
+
14
+ from .PromptInteractionBase import *
15
+ from .IOHelperUtilities import *
16
+ from .SelfStudyPrompts import *
17
+ from .MediaVectorStores import *
18
+
19
+ # %% ../nbs/gradio_application.ipynb 13
20
+ def save_chatbot_dialogue(chat_tutor, save_type):
21
+
22
+ formatted_convo = pd.DataFrame(chat_tutor.conversation_memory, columns=['user', 'chatbot'])
23
+
24
+ output_fname = f'tutoring_conversation.{save_type}'
25
+
26
+ if save_type == 'csv':
27
+ formatted_convo.to_csv(output_fname, index=False)
28
+ elif save_type == 'json':
29
+ formatted_convo.to_json(output_fname, orient='records')
30
+ elif save_type == 'txt':
31
+ temp = formatted_convo.apply(lambda x: 'User: {0}\nAI: {1}'.format(x[0], x[1]), axis=1)
32
+ temp = '\n\n'.join(temp.tolist())
33
+ with open(output_fname, 'w') as f:
34
+ f.write(temp)
35
+ else:
36
+ gr.update(value=None, visible=False)
37
+
38
+ return gr.update(value=output_fname, visible=True)
39
+
40
+ save_pdf = partial(save_chatbot_dialogue, save_type='pdf')
41
+ save_json = partial(save_chatbot_dialogue, save_type='json')
42
+ save_txt = partial(save_chatbot_dialogue, save_type='txt')
43
+ save_csv = partial(save_chatbot_dialogue, save_type='csv')
44
+
45
+
46
+ # %% ../nbs/gradio_application.ipynb 16
47
+ class SlightlyDelusionalTutor:
48
+ # create basic initialization function
49
+ def __init__(self, model_name = None):
50
+
51
+ # create default model name
52
+ if model_name is None:
53
+ self.model_name = 'gpt-3.5-turbo-16k'
54
+
55
+ self.chat_llm = None
56
+ self.tutor_chain = None
57
+ self.vector_store = None
58
+ self.vs_retriever = None
59
+ self.conversation_memory = []
60
+ self.sources_memory = []
61
+ self.flattened_conversation = ''
62
+ self.api_key_valid = False
63
+ self.learning_objectives = None
64
+ self.openai_auth = ''
65
+
66
+ def initialize_llm(self):
67
+
68
+ if self.openai_auth:
69
+ try:
70
+ self.chat_llm = create_model(self.model_name, openai_api_key = self.openai_auth)
71
+ self.api_key_valid = True
72
+ except Exception as e:
73
+ print(e)
74
+ self.api_key_valid = False
75
+ else:
76
+ print("Please provide an OpenAI API key and press Enter.")
77
+
78
+ def add_user_message(self, user_message):
79
+ self.conversation_memory.append([user_message, None])
80
+ self.flattened_conversation = self.flattened_conversation + '\n\n' + 'User: ' + user_message
81
+
82
+ def get_tutor_reply(self, **input_kwargs):
83
+
84
+ if not self.conversation_memory:
85
+ return "Please type something to start the conversation."
86
+
87
+ # we want to have a different vector comparison for reference lookup after the topic is first used
88
+ if len(self.conversation_memory) > 1:
89
+ if 'question' in input_kwargs.keys():
90
+ if input_kwargs['question']:
91
+ input_kwargs['question'] = self.conversation_memory[-1][0] + ' keeping in mind I want to learn about ' + input_kwargs['question']
92
+ else:
93
+ input_kwargs['question'] = self.conversation_memory[-1][0]
94
+
95
+ # get tutor message
96
+ tutor_message = get_tutoring_answer(None,
97
+ self.tutor_chain,
98
+ assessment_request = self.flattened_conversation + 'First, please provide your feedback on my previous answer if I was answering a question, otherwise, respond appropriately to my statement. Then, help me with the following:' + self.conversation_memory[-1][0],
99
+ learning_objectives = self.learning_objectives,
100
+ return_dict=True,
101
+ **input_kwargs)
102
+
103
+ # add tutor message to conversation memory
104
+ self.conversation_memory[-1][1] = tutor_message['answer']
105
+ self.flattened_conversation = self.flattened_conversation + '\nAI: ' + tutor_message['answer']
106
+ self.sources_memory.append(tutor_message['source_documents'])
107
+ #print(self.flattened_conversation, '\n\n')
108
+ print(tutor_message['source_documents'])
109
+
110
+ def get_sources_memory(self):
111
+ # retrieve last source
112
+ last_sources = self.sources_memory[-1]
113
+
114
+ # get page_content keyword from last_sources
115
+ doc_contents = ['Source ' + str(ind+1) + '\n"' + doc.page_content + '"\n\n' for ind, doc in enumerate(last_sources)]
116
+ doc_contents = ''.join(doc_contents)
117
+
118
+ return doc_contents
119
+
120
+ def forget_conversation(self):
121
+ self.conversation_memory = []
122
+ self.sources_memory = []
123
+ self.flattened_conversation = ''
124
+
125
+ # %% ../nbs/gradio_application.ipynb 18
126
+ def embed_key(openai_api_key, chat_tutor):
127
+ if not openai_api_key:
128
+ return chat_tutor
129
+
130
+ # Otherwise, update key
131
+ os.environ["OPENAI_API_KEY"] = openai_api_key
132
+
133
+ #update tutor
134
+ chat_tutor.openai_auth = openai_api_key
135
+
136
+ if not chat_tutor.api_key_valid:
137
+ chat_tutor.initialize_llm()
138
+
139
+ return chat_tutor
140
+
141
+ # %% ../nbs/gradio_application.ipynb 20
142
+ def create_reference_store(chat_tutor, vs_button, text_cp, upload_files, reference_vs, openai_auth, learning_objs):
143
+
144
+ text_segs = []
145
+ upload_segs = []
146
+
147
+ if reference_vs:
148
+ raise NotImplementedError("Reference Vector Stores are not yet implemented")
149
+
150
+ if text_cp.strip():
151
+ text_segs = get_document_segments(text_cp, 'text', chunk_size=700, chunk_overlap=100)
152
+ [doc.metadata.update({'source':'text box'}) for doc in text_segs];
153
+
154
+ if upload_files:
155
+ print(upload_files)
156
+ upload_fnames = [f.name for f in upload_files]
157
+ upload_segs = get_document_segments(upload_fnames, 'file', chunk_size=700, chunk_overlap=100)
158
+
159
+ # get the full list of everything
160
+ all_segs = text_segs + upload_segs
161
+ print(all_segs)
162
+
163
+ # create the vector store and update tutor
164
+ vs_db, vs_retriever = create_local_vector_store(all_segs, search_kwargs={"k": 2})
165
+ chat_tutor.vector_store = vs_db
166
+ chat_tutor.vs_retriever = vs_retriever
167
+
168
+ # create the tutor chain
169
+ if not chat_tutor.api_key_valid or not chat_tutor.openai_auth:
170
+ chat_tutor = embed_key(openai_auth, chat_tutor)
171
+ qa_chain = create_tutor_mdl_chain(kind="retrieval_qa", mdl=chat_tutor.chat_llm, retriever = chat_tutor.vs_retriever, return_source_documents=True)
172
+ chat_tutor.tutor_chain = qa_chain
173
+
174
+ # store learning objectives
175
+ chat_tutor.learning_objectives = learning_objs
176
+
177
+ # return the story
178
+ return chat_tutor, gr.update(interactive=True, value='Tutor Initialized!')
179
+
180
+ # %% ../nbs/gradio_application.ipynb 22
181
+ ### Gradio Called Functions ###
182
+
183
+ def prompt_select(selection, number, length):
184
+ if selection == "Random":
185
+ prompt = f"Please design a {number} question quiz based on the context provided and the inputted learning objectives (if applicable). The types of questions should be randomized (including multiple choice, short answer, true/false, short answer, etc.). Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide 1 question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
186
+ elif selection == "Fill in the Blank":
187
+ prompt = f"Create a {number} question fill in the blank quiz refrencing the context provided. The quiz should reflect the learning objectives (if inputted). The 'blank' part of the question should appear as '________'. The answers should reflect what word(s) should go in the blank an accurate statement. An example is the follow: 'The author of the article is ______.' The question should be a statement. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
188
+ elif selection == "Short Answer":
189
+ prompt = f"Please design a {number} question quiz about which reflects the learning objectives (if inputted). The questions should be short answer. Expect the correct answers to be {length} sentences long. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct answer is right."
190
+ else:
191
+ prompt = f"Please design a {number} question {selection.lower()} quiz based on the context provided and the inputted learning objectives (if applicable). Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide 1 question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
192
+ return prompt, prompt
193
+
194
+
195
+ # %% ../nbs/gradio_application.ipynb 24
196
+ ### Chatbot Functions ###
197
+
198
+ def add_user_message(user_message, chat_tutor):
199
+ """Display user message and update chat history to include it.
200
+ Also disables user text input until bot is finished (call to reenable_chat())
201
+ See https://gradio.app/creating-a-chatbot/"""
202
+ chat_tutor.add_user_message(user_message)
203
+ return gr.update(value="", interactive=False), chat_tutor.conversation_memory, chat_tutor
204
+
205
+ def get_tutor_reply(learning_topic, chat_tutor):
206
+ chat_tutor.get_tutor_reply(input_kwargs={'question':learning_topic})
207
+ return gr.update(value="", interactive=True), gr.update(visible=True, value=chat_tutor.get_sources_memory()), chat_tutor.conversation_memory, chat_tutor
208
+
209
+ num_sources = 2
210
+
211
+ # %% ../nbs/gradio_application.ipynb 25
212
+ def disable_until_done(obj_in):
213
+ return gr.update(interactive=False)
214
+
215
+ # %% ../nbs/gradio_application.ipynb 27
216
+ # See https://gradio.app/custom-CSS-and-JS/
217
+ css="""
218
+ #sources-container {
219
+ overflow: scroll !important; /* Needs to override default formatting */
220
+ /*max-height: 20em; */ /* Arbitrary value */
221
+ }
222
+ #sources-container > div { padding-bottom: 1em !important; /* Arbitrary value */ }
223
+ .short-height > * > * { min-height: 0 !important; }
224
+ .translucent { opacity: 0.5; }
225
+ .textbox_label { padding-bottom: .5em; }
226
+ """
227
+ #srcs = [] # Reset sources (db and qa are kept the same for ease of testing)
228
+
229
+ with gr.Blocks(css=css, analytics_enabled=False) as demo:
230
+
231
+ #initialize tutor (with state)
232
+ study_tutor = gr.State(SlightlyDelusionalTutor())
233
+
234
+ # Title
235
+ gr.Markdown("# Studying with a Slightly Delusional Tutor")
236
+
237
+ # API Authentication functionality
238
+ with gr.Box():
239
+ gr.Markdown("### OpenAI API Key ")
240
+ gr.HTML("""<span>Embed your OpenAI API key below; if you haven't created one already, visit
241
+ <a href="https://platform.openai.com/account/api-keys">platform.openai.com/account/api-keys</a>
242
+ to sign up for an account and get your personal API key</span>""",
243
+ elem_classes="textbox_label")
244
+ api_input = gr.Textbox(show_label=False, type="password", container=False, autofocus=True,
245
+ placeholder="●●●●●●●●●●●●●●●●●", value='')
246
+ api_input.submit(fn=embed_key, inputs=[api_input, study_tutor], outputs=study_tutor)
247
+ api_input.blur(fn=embed_key, inputs=[api_input, study_tutor], outputs=study_tutor)
248
+
249
+ # Reference document functionality (building vector stores)
250
+ with gr.Box():
251
+ gr.Markdown("### Add Reference Documents")
252
+ # TODO Add entry for path to vector store (should be disabled for now)
253
+ with gr.Row(equal_height=True):
254
+ text_input = gr.TextArea(label='Copy and paste your text below',
255
+ lines=2)
256
+
257
+ file_input = gr.Files(label="Load a .txt or .pdf file",
258
+ file_types=['.pdf', '.txt'], type="file",
259
+ elem_classes="short-height")
260
+
261
+ instructor_input = gr.TextArea(label='Enter vector store URL, if given by instructor (WIP)', value='',
262
+ lines=2, interactive=False, elem_classes="translucent")
263
+
264
+ # Adding the learning objectives
265
+ with gr.Box():
266
+ gr.Markdown("### Optional: Enter Your Learning Objectives")
267
+ learning_objectives = gr.Textbox(label='If provided by your instructor, please input your learning objectives for this session', value='')
268
+
269
+ # Adding the button to submit all of the settings and create the Chat Tutor Chain.
270
+ with gr.Row():
271
+ vs_build_button = gr.Button(value = 'Start Studying with Your Tutor!', scale=1)
272
+ vs_build_button.click(disable_until_done, vs_build_button, vs_build_button) \
273
+ .then(create_reference_store, [study_tutor, vs_build_button, text_input, file_input, instructor_input, api_input, learning_objectives],
274
+ [study_tutor, vs_build_button])
275
+
276
+
277
+
278
+ # Premade question prompts
279
+ with gr.Box():
280
+ gr.Markdown("""
281
+ ## Generate a Premade Prompt
282
+ Select your type and number of desired questions. Click "Generate Prompt" to get your premade prompt,
283
+ and then "Insert Prompt into Chat" to copy the text into the chat interface below. \
284
+ You can also copy the prompt using the icon in the upper right corner and paste directly into the input box when interacting with the model.
285
+ """)
286
+ with gr.Row():
287
+ with gr.Column():
288
+ question_type = gr.Dropdown(["Multiple Choice", "True or False", "Short Answer", "Fill in the Blank", "Random"], label="Question Type")
289
+ number_of_questions = gr.Textbox(label="Enter desired number of questions")
290
+ sa_desired_length = gr.Dropdown(["1-2", "3-4", "5-6", "6 or more"], label = "For short answer questions only, choose the desired sentence length for answers. The default value is 1-2 sentences.")
291
+ with gr.Column():
292
+ prompt_button = gr.Button("Generate Prompt")
293
+ premade_prompt_output = gr.Textbox(label="Generated prompt (save or copy)", show_copy_button=True)
294
+
295
+
296
+ # Chatbot interface
297
+ gr.Markdown("## Chat with the Model")
298
+ topic_input = gr.Textbox(label="What topic or concept are you trying to learn more about?")
299
+ with gr.Row(equal_height=True):
300
+ with gr.Column(scale=2):
301
+ chatbot = gr.Chatbot()
302
+ with gr.Row():
303
+ user_chat_input = gr.Textbox(label="User input", scale=9)
304
+ user_chat_submit = gr.Button("Ask/answer model", scale=1)
305
+
306
+ # sources
307
+ with gr.Box(elem_id="sources-container", scale=1):
308
+ # TODO: Display document sources in a nicer format?
309
+ gr.HTML(value="<h3 id='sources'>Referenced Sources</h3>")
310
+ sources_output = gr.Textbox(value='', interactive=False, visible=False, show_label=False)
311
+ #sources_output = []
312
+ #for i in range(num_sources):
313
+ # source_elem = gr.HTML(visible=False)
314
+ # sources_output.append(source_elem)
315
+
316
+ #define the behavior of prompt button later since it depends on user_chat_input
317
+ prompt_button.click(prompt_select,
318
+ inputs=[question_type, number_of_questions, sa_desired_length],
319
+ outputs=[premade_prompt_output, user_chat_input])
320
+
321
+ # Display input and output in three-ish parts
322
+ # (using asynchronous functions):
323
+ # First show user input, then show model output when complete
324
+ # Then wait until the bot provides response and return the result
325
+ # Finally, allow the user to ask a new question by reenabling input
326
+ async_response = user_chat_submit.click(add_user_message,
327
+ [user_chat_input, study_tutor],
328
+ [user_chat_input, chatbot, study_tutor], queue=False) \
329
+ .then(get_tutor_reply, [topic_input, study_tutor], [user_chat_input, sources_output, chatbot, study_tutor], queue=True)
330
+
331
+ async_response_b = user_chat_input.submit(add_user_message,
332
+ [user_chat_input, study_tutor],
333
+ [user_chat_input, chatbot, study_tutor], queue=False) \
334
+ .then(get_tutor_reply, [topic_input, study_tutor], [user_chat_input, sources_output, chatbot, study_tutor], queue=True)
335
+
336
+ with gr.Blocks():
337
+ gr.Markdown("""
338
+ ## Export Your Chat History
339
+ Export your chat history as a .json, PDF file, .txt, or .csv file
340
+ """)
341
+ with gr.Row():
342
+ export_dialogue_button_json = gr.Button("JSON")
343
+ export_dialogue_button_pdf = gr.Button("PDF")
344
+ export_dialogue_button_txt = gr.Button("TXT")
345
+ export_dialogue_button_csv = gr.Button("CSV")
346
+
347
+ file_download = gr.Files(label="Download here",
348
+ file_types=['.pdf', '.txt', '.csv', '.json'], type="file", visible=False)
349
+
350
+ export_dialogue_button_json.click(save_json, study_tutor, file_download, show_progress=True)
351
+ export_dialogue_button_pdf.click(save_pdf, study_tutor, file_download, show_progress=True)
352
+ export_dialogue_button_txt.click(save_txt, study_tutor, file_download, show_progress=True)
353
+ export_dialogue_button_csv.click(save_csv, study_tutor, file_download, show_progress=True)
354
+
355
+ demo.queue()
356
+ demo.launch(debug=True)
357
+ #demo.launch()
358
+ #gr.close_all()
lo-achievement/basic_UI_design_oral_exam.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
lo-achievement/grading_from_json.ipynb ADDED
@@ -0,0 +1,606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "colab_type": "text",
7
+ "id": "view-in-github"
8
+ },
9
+ "source": [
10
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/main/grading_from_json.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "metadata": {
17
+ "id": "kfO7rE64ZTI_"
18
+ },
19
+ "outputs": [],
20
+ "source": [
21
+ "!pip install openai"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 2,
27
+ "metadata": {
28
+ "id": "f26sZpe-MCCj"
29
+ },
30
+ "outputs": [],
31
+ "source": [
32
+ "import json\n",
33
+ "import openai\n",
34
+ "import os\n",
35
+ "import pandas as pd"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 4,
41
+ "metadata": {
42
+ "colab": {
43
+ "base_uri": "https://localhost:8080/",
44
+ "height": 614
45
+ },
46
+ "id": "BVTr_mR0XIJI",
47
+ "outputId": "897e41a0-d5e1-4b5f-d254-0a6e0f6aa3fa"
48
+ },
49
+ "outputs": [
50
+ {
51
+ "data": {
52
+ "text/html": [
53
+ "\n",
54
+ " <div id=\"df-e24b7014-4d98-4fc5-9ff1-07fa5c26ba5e\">\n",
55
+ " <div class=\"colab-df-container\">\n",
56
+ " <div>\n",
57
+ "<style scoped>\n",
58
+ " .dataframe tbody tr th:only-of-type {\n",
59
+ " vertical-align: middle;\n",
60
+ " }\n",
61
+ "\n",
62
+ " .dataframe tbody tr th {\n",
63
+ " vertical-align: top;\n",
64
+ " }\n",
65
+ "\n",
66
+ " .dataframe thead th {\n",
67
+ " text-align: right;\n",
68
+ " }\n",
69
+ "</style>\n",
70
+ "<table border=\"1\" class=\"dataframe\">\n",
71
+ " <thead>\n",
72
+ " <tr style=\"text-align: right;\">\n",
73
+ " <th></th>\n",
74
+ " <th>timestamp</th>\n",
75
+ " <th>author</th>\n",
76
+ " <th>message</th>\n",
77
+ " </tr>\n",
78
+ " </thead>\n",
79
+ " <tbody>\n",
80
+ " <tr>\n",
81
+ " <th>0</th>\n",
82
+ " <td>2023-06-07 08:16:00+00:00</td>\n",
83
+ " <td>assistant</td>\n",
84
+ " <td>Question 1:\\nWhich of the following statements...</td>\n",
85
+ " </tr>\n",
86
+ " <tr>\n",
87
+ " <th>1</th>\n",
88
+ " <td>2023-06-07 08:16:30+00:00</td>\n",
89
+ " <td>user</td>\n",
90
+ " <td>C</td>\n",
91
+ " </tr>\n",
92
+ " <tr>\n",
93
+ " <th>2</th>\n",
94
+ " <td>2023-06-07 08:17:00+00:00</td>\n",
95
+ " <td>assistant</td>\n",
96
+ " <td>Correct! Option C is the correct answer...</td>\n",
97
+ " </tr>\n",
98
+ " <tr>\n",
99
+ " <th>3</th>\n",
100
+ " <td>2023-06-07 08:17:30+00:00</td>\n",
101
+ " <td>assistant</td>\n",
102
+ " <td>Question 2:\\nWhich of the following expenses a...</td>\n",
103
+ " </tr>\n",
104
+ " <tr>\n",
105
+ " <th>4</th>\n",
106
+ " <td>2023-06-07 08:18:00+00:00</td>\n",
107
+ " <td>user</td>\n",
108
+ " <td>A</td>\n",
109
+ " </tr>\n",
110
+ " <tr>\n",
111
+ " <th>5</th>\n",
112
+ " <td>2023-06-07 08:18:30+00:00</td>\n",
113
+ " <td>assistant</td>\n",
114
+ " <td>I'm sorry, but your answer is incorrect...</td>\n",
115
+ " </tr>\n",
116
+ " <tr>\n",
117
+ " <th>6</th>\n",
118
+ " <td>2023-06-07 08:19:00+00:00</td>\n",
119
+ " <td>assistant</td>\n",
120
+ " <td>Question 2 (Revised):\\nWhich of the following ...</td>\n",
121
+ " </tr>\n",
122
+ " <tr>\n",
123
+ " <th>7</th>\n",
124
+ " <td>2023-06-07 08:19:30+00:00</td>\n",
125
+ " <td>user</td>\n",
126
+ " <td>D</td>\n",
127
+ " </tr>\n",
128
+ " <tr>\n",
129
+ " <th>8</th>\n",
130
+ " <td>2023-06-07 08:20:00+00:00</td>\n",
131
+ " <td>assistant</td>\n",
132
+ " <td>Correct! Option D is the correct answer...</td>\n",
133
+ " </tr>\n",
134
+ " <tr>\n",
135
+ " <th>9</th>\n",
136
+ " <td>2023-06-07 08:20:30+00:00</td>\n",
137
+ " <td>assistant</td>\n",
138
+ " <td>Question 3:\\nWhat is the purpose of capitalizi...</td>\n",
139
+ " </tr>\n",
140
+ " <tr>\n",
141
+ " <th>10</th>\n",
142
+ " <td>2023-06-07 08:21:00+00:00</td>\n",
143
+ " <td>user</td>\n",
144
+ " <td>C</td>\n",
145
+ " </tr>\n",
146
+ " <tr>\n",
147
+ " <th>11</th>\n",
148
+ " <td>2023-06-07 08:21:30+00:00</td>\n",
149
+ " <td>assistant</td>\n",
150
+ " <td>Correct! Option C is the correct answer...</td>\n",
151
+ " </tr>\n",
152
+ " <tr>\n",
153
+ " <th>12</th>\n",
154
+ " <td>2023-06-07 08:22:00+00:00</td>\n",
155
+ " <td>assistant</td>\n",
156
+ " <td>Question 4:\\nWhich financial statement provide...</td>\n",
157
+ " </tr>\n",
158
+ " <tr>\n",
159
+ " <th>13</th>\n",
160
+ " <td>2023-06-07 08:22:30+00:00</td>\n",
161
+ " <td>user</td>\n",
162
+ " <td>C</td>\n",
163
+ " </tr>\n",
164
+ " <tr>\n",
165
+ " <th>14</th>\n",
166
+ " <td>2023-06-07 08:23:00+00:00</td>\n",
167
+ " <td>assistant</td>\n",
168
+ " <td>Correct! Option C is the correct answer...</td>\n",
169
+ " </tr>\n",
170
+ " <tr>\n",
171
+ " <th>15</th>\n",
172
+ " <td>2023-06-07 08:23:30+00:00</td>\n",
173
+ " <td>assistant</td>\n",
174
+ " <td>Question 5:\\nWhat is the purpose of the matchi...</td>\n",
175
+ " </tr>\n",
176
+ " <tr>\n",
177
+ " <th>16</th>\n",
178
+ " <td>2023-06-07 08:24:00+00:00</td>\n",
179
+ " <td>user</td>\n",
180
+ " <td>B</td>\n",
181
+ " </tr>\n",
182
+ " <tr>\n",
183
+ " <th>17</th>\n",
184
+ " <td>2023-06-07 08:24:30+00:00</td>\n",
185
+ " <td>assistant</td>\n",
186
+ " <td>Correct! Option B is the correct answer...</td>\n",
187
+ " </tr>\n",
188
+ " </tbody>\n",
189
+ "</table>\n",
190
+ "</div>\n",
191
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e24b7014-4d98-4fc5-9ff1-07fa5c26ba5e')\"\n",
192
+ " title=\"Convert this dataframe to an interactive table.\"\n",
193
+ " style=\"display:none;\">\n",
194
+ " \n",
195
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
196
+ " width=\"24px\">\n",
197
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
198
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
199
+ " </svg>\n",
200
+ " </button>\n",
201
+ " \n",
202
+ " <style>\n",
203
+ " .colab-df-container {\n",
204
+ " display:flex;\n",
205
+ " flex-wrap:wrap;\n",
206
+ " gap: 12px;\n",
207
+ " }\n",
208
+ "\n",
209
+ " .colab-df-convert {\n",
210
+ " background-color: #E8F0FE;\n",
211
+ " border: none;\n",
212
+ " border-radius: 50%;\n",
213
+ " cursor: pointer;\n",
214
+ " display: none;\n",
215
+ " fill: #1967D2;\n",
216
+ " height: 32px;\n",
217
+ " padding: 0 0 0 0;\n",
218
+ " width: 32px;\n",
219
+ " }\n",
220
+ "\n",
221
+ " .colab-df-convert:hover {\n",
222
+ " background-color: #E2EBFA;\n",
223
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
224
+ " fill: #174EA6;\n",
225
+ " }\n",
226
+ "\n",
227
+ " [theme=dark] .colab-df-convert {\n",
228
+ " background-color: #3B4455;\n",
229
+ " fill: #D2E3FC;\n",
230
+ " }\n",
231
+ "\n",
232
+ " [theme=dark] .colab-df-convert:hover {\n",
233
+ " background-color: #434B5C;\n",
234
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
235
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
236
+ " fill: #FFFFFF;\n",
237
+ " }\n",
238
+ " </style>\n",
239
+ "\n",
240
+ " <script>\n",
241
+ " const buttonEl =\n",
242
+ " document.querySelector('#df-e24b7014-4d98-4fc5-9ff1-07fa5c26ba5e button.colab-df-convert');\n",
243
+ " buttonEl.style.display =\n",
244
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
245
+ "\n",
246
+ " async function convertToInteractive(key) {\n",
247
+ " const element = document.querySelector('#df-e24b7014-4d98-4fc5-9ff1-07fa5c26ba5e');\n",
248
+ " const dataTable =\n",
249
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
250
+ " [key], {});\n",
251
+ " if (!dataTable) return;\n",
252
+ "\n",
253
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
254
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
255
+ " + ' to learn more about interactive tables.';\n",
256
+ " element.innerHTML = '';\n",
257
+ " dataTable['output_type'] = 'display_data';\n",
258
+ " await google.colab.output.renderOutput(dataTable, element);\n",
259
+ " const docLink = document.createElement('div');\n",
260
+ " docLink.innerHTML = docLinkHtml;\n",
261
+ " element.appendChild(docLink);\n",
262
+ " }\n",
263
+ " </script>\n",
264
+ " </div>\n",
265
+ " </div>\n",
266
+ " "
267
+ ],
268
+ "text/plain": [
269
+ " timestamp author \\\n",
270
+ "0 2023-06-07 08:16:00+00:00 assistant \n",
271
+ "1 2023-06-07 08:16:30+00:00 user \n",
272
+ "2 2023-06-07 08:17:00+00:00 assistant \n",
273
+ "3 2023-06-07 08:17:30+00:00 assistant \n",
274
+ "4 2023-06-07 08:18:00+00:00 user \n",
275
+ "5 2023-06-07 08:18:30+00:00 assistant \n",
276
+ "6 2023-06-07 08:19:00+00:00 assistant \n",
277
+ "7 2023-06-07 08:19:30+00:00 user \n",
278
+ "8 2023-06-07 08:20:00+00:00 assistant \n",
279
+ "9 2023-06-07 08:20:30+00:00 assistant \n",
280
+ "10 2023-06-07 08:21:00+00:00 user \n",
281
+ "11 2023-06-07 08:21:30+00:00 assistant \n",
282
+ "12 2023-06-07 08:22:00+00:00 assistant \n",
283
+ "13 2023-06-07 08:22:30+00:00 user \n",
284
+ "14 2023-06-07 08:23:00+00:00 assistant \n",
285
+ "15 2023-06-07 08:23:30+00:00 assistant \n",
286
+ "16 2023-06-07 08:24:00+00:00 user \n",
287
+ "17 2023-06-07 08:24:30+00:00 assistant \n",
288
+ "\n",
289
+ " message \n",
290
+ "0 Question 1:\\nWhich of the following statements... \n",
291
+ "1 C \n",
292
+ "2 Correct! Option C is the correct answer... \n",
293
+ "3 Question 2:\\nWhich of the following expenses a... \n",
294
+ "4 A \n",
295
+ "5 I'm sorry, but your answer is incorrect... \n",
296
+ "6 Question 2 (Revised):\\nWhich of the following ... \n",
297
+ "7 D \n",
298
+ "8 Correct! Option D is the correct answer... \n",
299
+ "9 Question 3:\\nWhat is the purpose of capitalizi... \n",
300
+ "10 C \n",
301
+ "11 Correct! Option C is the correct answer... \n",
302
+ "12 Question 4:\\nWhich financial statement provide... \n",
303
+ "13 C \n",
304
+ "14 Correct! Option C is the correct answer... \n",
305
+ "15 Question 5:\\nWhat is the purpose of the matchi... \n",
306
+ "16 B \n",
307
+ "17 Correct! Option B is the correct answer... "
308
+ ]
309
+ },
310
+ "execution_count": 4,
311
+ "metadata": {},
312
+ "output_type": "execute_result"
313
+ }
314
+ ],
315
+ "source": [
316
+ "df = pd.read_json('demo_json.json')\n",
317
+ "pd.read_json('demo_json.json')"
318
+ ]
319
+ },
320
+ {
321
+ "cell_type": "code",
322
+ "execution_count": 5,
323
+ "metadata": {
324
+ "id": "anSNlvqlXh6i"
325
+ },
326
+ "outputs": [],
327
+ "source": [
328
+ "openai.api_key = \"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\""
329
+ ]
330
+ },
331
+ {
332
+ "cell_type": "code",
333
+ "execution_count": 8,
334
+ "metadata": {
335
+ "colab": {
336
+ "base_uri": "https://localhost:8080/",
337
+ "height": 627
338
+ },
339
+ "id": "udujJrX6SryU",
340
+ "outputId": "9b182162-7c1c-4d5a-be56-16947ddcda33"
341
+ },
342
+ "outputs": [
343
+ {
344
+ "data": {
345
+ "text/html": [
346
+ "\n",
347
+ " <div id=\"df-5123f950-1dca-46a6-be4d-dab5de1f8899\">\n",
348
+ " <div class=\"colab-df-container\">\n",
349
+ " <div>\n",
350
+ "<style scoped>\n",
351
+ " .dataframe tbody tr th:only-of-type {\n",
352
+ " vertical-align: middle;\n",
353
+ " }\n",
354
+ "\n",
355
+ " .dataframe tbody tr th {\n",
356
+ " vertical-align: top;\n",
357
+ " }\n",
358
+ "\n",
359
+ " .dataframe thead th {\n",
360
+ " text-align: right;\n",
361
+ " }\n",
362
+ "</style>\n",
363
+ "<table border=\"1\" class=\"dataframe\">\n",
364
+ " <thead>\n",
365
+ " <tr style=\"text-align: right;\">\n",
366
+ " <th></th>\n",
367
+ " <th>Question</th>\n",
368
+ " <th>Correct Answer</th>\n",
369
+ " <th>User Answer</th>\n",
370
+ " <th>Evaluation</th>\n",
371
+ " <th>Score</th>\n",
372
+ " </tr>\n",
373
+ " </thead>\n",
374
+ " <tbody>\n",
375
+ " <tr>\n",
376
+ " <th>0</th>\n",
377
+ " <td>Question 1:\\nWhich of the following statements...</td>\n",
378
+ " <td>C</td>\n",
379
+ " <td>C</td>\n",
380
+ " <td>correct.</td>\n",
381
+ " <td>1</td>\n",
382
+ " </tr>\n",
383
+ " <tr>\n",
384
+ " <th>1</th>\n",
385
+ " <td>Question 2 (Revised):\\nWhich of the following ...</td>\n",
386
+ " <td>D</td>\n",
387
+ " <td>D</td>\n",
388
+ " <td>incorrect. the correct answer is d, software d...</td>\n",
389
+ " <td>1</td>\n",
390
+ " </tr>\n",
391
+ " <tr>\n",
392
+ " <th>2</th>\n",
393
+ " <td>Question 3:\\nWhat is the purpose of capitalizi...</td>\n",
394
+ " <td>C</td>\n",
395
+ " <td>C</td>\n",
396
+ " <td>incorrect. the correct answer is b.</td>\n",
397
+ " <td>1</td>\n",
398
+ " </tr>\n",
399
+ " <tr>\n",
400
+ " <th>3</th>\n",
401
+ " <td>Question 4:\\nWhich financial statement provide...</td>\n",
402
+ " <td>C</td>\n",
403
+ " <td>C</td>\n",
404
+ " <td>correct</td>\n",
405
+ " <td>2</td>\n",
406
+ " </tr>\n",
407
+ " <tr>\n",
408
+ " <th>4</th>\n",
409
+ " <td>Question 5:\\nWhat is the purpose of the matchi...</td>\n",
410
+ " <td>B</td>\n",
411
+ " <td>B</td>\n",
412
+ " <td>correct</td>\n",
413
+ " <td>3</td>\n",
414
+ " </tr>\n",
415
+ " </tbody>\n",
416
+ "</table>\n",
417
+ "</div>\n",
418
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5123f950-1dca-46a6-be4d-dab5de1f8899')\"\n",
419
+ " title=\"Convert this dataframe to an interactive table.\"\n",
420
+ " style=\"display:none;\">\n",
421
+ " \n",
422
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
423
+ " width=\"24px\">\n",
424
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
425
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
426
+ " </svg>\n",
427
+ " </button>\n",
428
+ " \n",
429
+ " <style>\n",
430
+ " .colab-df-container {\n",
431
+ " display:flex;\n",
432
+ " flex-wrap:wrap;\n",
433
+ " gap: 12px;\n",
434
+ " }\n",
435
+ "\n",
436
+ " .colab-df-convert {\n",
437
+ " background-color: #E8F0FE;\n",
438
+ " border: none;\n",
439
+ " border-radius: 50%;\n",
440
+ " cursor: pointer;\n",
441
+ " display: none;\n",
442
+ " fill: #1967D2;\n",
443
+ " height: 32px;\n",
444
+ " padding: 0 0 0 0;\n",
445
+ " width: 32px;\n",
446
+ " }\n",
447
+ "\n",
448
+ " .colab-df-convert:hover {\n",
449
+ " background-color: #E2EBFA;\n",
450
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
451
+ " fill: #174EA6;\n",
452
+ " }\n",
453
+ "\n",
454
+ " [theme=dark] .colab-df-convert {\n",
455
+ " background-color: #3B4455;\n",
456
+ " fill: #D2E3FC;\n",
457
+ " }\n",
458
+ "\n",
459
+ " [theme=dark] .colab-df-convert:hover {\n",
460
+ " background-color: #434B5C;\n",
461
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
462
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
463
+ " fill: #FFFFFF;\n",
464
+ " }\n",
465
+ " </style>\n",
466
+ "\n",
467
+ " <script>\n",
468
+ " const buttonEl =\n",
469
+ " document.querySelector('#df-5123f950-1dca-46a6-be4d-dab5de1f8899 button.colab-df-convert');\n",
470
+ " buttonEl.style.display =\n",
471
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
472
+ "\n",
473
+ " async function convertToInteractive(key) {\n",
474
+ " const element = document.querySelector('#df-5123f950-1dca-46a6-be4d-dab5de1f8899');\n",
475
+ " const dataTable =\n",
476
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
477
+ " [key], {});\n",
478
+ " if (!dataTable) return;\n",
479
+ "\n",
480
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
481
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
482
+ " + ' to learn more about interactive tables.';\n",
483
+ " element.innerHTML = '';\n",
484
+ " dataTable['output_type'] = 'display_data';\n",
485
+ " await google.colab.output.renderOutput(dataTable, element);\n",
486
+ " const docLink = document.createElement('div');\n",
487
+ " docLink.innerHTML = docLinkHtml;\n",
488
+ " element.appendChild(docLink);\n",
489
+ " }\n",
490
+ " </script>\n",
491
+ " </div>\n",
492
+ " </div>\n",
493
+ " "
494
+ ],
495
+ "text/plain": [
496
+ " Question Correct Answer \\\n",
497
+ "0 Question 1:\\nWhich of the following statements... C \n",
498
+ "1 Question 2 (Revised):\\nWhich of the following ... D \n",
499
+ "2 Question 3:\\nWhat is the purpose of capitalizi... C \n",
500
+ "3 Question 4:\\nWhich financial statement provide... C \n",
501
+ "4 Question 5:\\nWhat is the purpose of the matchi... B \n",
502
+ "\n",
503
+ " User Answer Evaluation Score \n",
504
+ "0 C correct. 1 \n",
505
+ "1 D incorrect. the correct answer is d, software d... 1 \n",
506
+ "2 C incorrect. the correct answer is b. 1 \n",
507
+ "3 C correct 2 \n",
508
+ "4 B correct 3 "
509
+ ]
510
+ },
511
+ "execution_count": 8,
512
+ "metadata": {},
513
+ "output_type": "execute_result"
514
+ }
515
+ ],
516
+ "source": [
517
+ "# Initialize necessary variables\n",
518
+ "prompt = \"\"\n",
519
+ "question = \"\"\n",
520
+ "correct_answer = \"\"\n",
521
+ "user_answer = \"\"\n",
522
+ "\n",
523
+ "# Initialize score\n",
524
+ "score = 0\n",
525
+ "\n",
526
+ "# Initialize an empty list to hold row data\n",
527
+ "row_data = []\n",
528
+ "\n",
529
+ "for index, row in df.iterrows():\n",
530
+ " author = row['author']\n",
531
+ " message = row['message']\n",
532
+ "\n",
533
+ " # Choose the appropriate prompt based on the author\n",
534
+ " if author == 'assistant':\n",
535
+ " if 'Question' in message:\n",
536
+ " question = message\n",
537
+ " user_answer = '' # Reset user_answer after a new question\n",
538
+ " elif 'Correct! Option' in message:\n",
539
+ " correct_answer = message.split('Option ')[1][0]\n",
540
+ " if user_answer: # If user_answer exists, make the API call\n",
541
+ " prompt = f\"Given the following question:\\n{question}\\nThe student responded with: {user_answer}\\nIs the student's response correct or incorrect?\"\n",
542
+ "\n",
543
+ " # Make an API call to OpenAI\n",
544
+ " api_response = openai.Completion.create(\n",
545
+ " engine='text-davinci-003',\n",
546
+ " prompt=prompt,\n",
547
+ " max_tokens=100,\n",
548
+ " temperature=0.7,\n",
549
+ " n=1,\n",
550
+ " stop=None\n",
551
+ " )\n",
552
+ "\n",
553
+ " # Extract and evaluate the generated response\n",
554
+ " generated_response = api_response.choices[0].text.strip().lower()\n",
555
+ "\n",
556
+ " # Update score based on generated_response\n",
557
+ " if 'correct' in generated_response and 'incorrect' not in generated_response:\n",
558
+ " score += 1\n",
559
+ "\n",
560
+ " # Create a dictionary for the current row\n",
561
+ " row_dict = {\n",
562
+ " 'Question': question,\n",
563
+ " 'Correct Answer': correct_answer,\n",
564
+ " 'User Answer': user_answer,\n",
565
+ " 'Evaluation': generated_response,\n",
566
+ " 'Score': score\n",
567
+ " }\n",
568
+ " # Append the row dictionary to row_data\n",
569
+ " row_data.append(row_dict)\n",
570
+ "\n",
571
+ " elif author == 'user':\n",
572
+ " user_answer = message\n",
573
+ "\n",
574
+ "# Create a DataFrame from row_data\n",
575
+ "output_df = pd.DataFrame(row_data)\n",
576
+ "output_df\n"
577
+ ]
578
+ }
579
+ ],
580
+ "metadata": {
581
+ "colab": {
582
+ "authorship_tag": "ABX9TyOn+FniXzrkHNKH5uAKgyUD",
583
+ "include_colab_link": true,
584
+ "provenance": []
585
+ },
586
+ "kernelspec": {
587
+ "display_name": "Python 3 (ipykernel)",
588
+ "language": "python",
589
+ "name": "python3"
590
+ },
591
+ "language_info": {
592
+ "codemirror_mode": {
593
+ "name": "ipython",
594
+ "version": 3
595
+ },
596
+ "file_extension": ".py",
597
+ "mimetype": "text/x-python",
598
+ "name": "python",
599
+ "nbconvert_exporter": "python",
600
+ "pygments_lexer": "ipython3",
601
+ "version": "3.8.16"
602
+ }
603
+ },
604
+ "nbformat": 4,
605
+ "nbformat_minor": 4
606
+ }
lo-achievement/instructor_intr_notebook.ipynb ADDED
@@ -0,0 +1,3153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "collapsed": true,
7
+ "id": "brzvVeAsYiG2"
8
+ },
9
+ "source": [
10
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/main/instructor_intr_notebook.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {
16
+ "id": "WMKrKfx8_3fc"
17
+ },
18
+ "source": [
19
+ "# Instructor Grading and Assessment\n",
20
+ "This notebook executes grading of student submissions of chats with ChatGPT, exported in JSON. Run each cell should be run in order, and follow the prompts displayed when appropriate."
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 35,
26
+ "metadata": {
27
+ "colab": {
28
+ "base_uri": "https://localhost:8080/",
29
+ "height": 16
30
+ },
31
+ "id": "696FqPrTYiG3",
32
+ "outputId": "9679a415-8ab7-4c5f-e715-954d6801b6ec"
33
+ },
34
+ "outputs": [
35
+ {
36
+ "data": {
37
+ "text/html": [
38
+ "\n",
39
+ " <style>\n",
40
+ " pre {\n",
41
+ " white-space: pre-wrap;\n",
42
+ " }\n",
43
+ " </style>\n",
44
+ " "
45
+ ],
46
+ "text/plain": [
47
+ "<IPython.core.display.HTML object>"
48
+ ]
49
+ },
50
+ "metadata": {},
51
+ "output_type": "display_data"
52
+ }
53
+ ],
54
+ "source": [
55
+ "import ipywidgets as widgets\n",
56
+ "from IPython.display import display, HTML, clear_output\n",
57
+ "import io\n",
58
+ "import zipfile\n",
59
+ "import os\n",
60
+ "import json\n",
61
+ "import pandas as pd\n",
62
+ "import glob\n",
63
+ "from getpass import getpass"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 36,
69
+ "metadata": {
70
+ "colab": {
71
+ "base_uri": "https://localhost:8080/",
72
+ "height": 16
73
+ },
74
+ "id": "fTlnrMwmYiG4",
75
+ "outputId": "e811e000-e9ec-43b6-d136-59d5134adeaf"
76
+ },
77
+ "outputs": [
78
+ {
79
+ "data": {
80
+ "text/html": [
81
+ "\n",
82
+ " <style>\n",
83
+ " pre {\n",
84
+ " white-space: pre-wrap;\n",
85
+ " }\n",
86
+ " </style>\n",
87
+ " "
88
+ ],
89
+ "text/plain": [
90
+ "<IPython.core.display.HTML object>"
91
+ ]
92
+ },
93
+ "metadata": {},
94
+ "output_type": "display_data"
95
+ }
96
+ ],
97
+ "source": [
98
+ "# \"global\" variables modified by mutability\n",
99
+ "grade_settings = {'learning_objectives':None,\n",
100
+ " 'json_file_path':None,\n",
101
+ " 'json_files':None }"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "markdown",
106
+ "metadata": {
107
+ "id": "jb0jnIE14Vuh"
108
+ },
109
+ "source": [
110
+ "The `InstructorGradingConfig` holds the contents of the instantiated object including making graindg settings, extracting files from a zip archive, loading JSON files into DataFrames, and displaying relevant information in the output widget."
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": 37,
116
+ "metadata": {
117
+ "colab": {
118
+ "base_uri": "https://localhost:8080/",
119
+ "height": 16
120
+ },
121
+ "id": "mPLdaWiuYiG4",
122
+ "outputId": "7a698bc1-7954-44ac-83c8-71d1dc410749"
123
+ },
124
+ "outputs": [
125
+ {
126
+ "data": {
127
+ "text/html": [
128
+ "\n",
129
+ " <style>\n",
130
+ " pre {\n",
131
+ " white-space: pre-wrap;\n",
132
+ " }\n",
133
+ " </style>\n",
134
+ " "
135
+ ],
136
+ "text/plain": [
137
+ "<IPython.core.display.HTML object>"
138
+ ]
139
+ },
140
+ "metadata": {},
141
+ "output_type": "display_data"
142
+ }
143
+ ],
144
+ "source": [
145
+ "class InstructorGradingConfig:\n",
146
+ " def __init__(self):\n",
147
+ " # layouts to help with styling\n",
148
+ " self.items_layout = widgets.Layout(width='auto')\n",
149
+ "\n",
150
+ " self.box_layout = widgets.Layout(display='flex',\n",
151
+ " flex_flow='column',\n",
152
+ " align_items='stretch',\n",
153
+ " width='50%',\n",
154
+ " border='solid 1px gray',\n",
155
+ " padding='0px 30px 20px 30px')\n",
156
+ "\n",
157
+ " # Create all components\n",
158
+ " self.ui_title = widgets.HTML(value=\"<h2>Instructor Grading Configuration</h2>\")\n",
159
+ "\n",
160
+ " self.run_button = widgets.Button(description='Submit', button_style='success', icon='check')\n",
161
+ " self.status_output = widgets.Output()\n",
162
+ " self.status_output.append_stdout('Waiting...')\n",
163
+ "\n",
164
+ " # Setup click behavior\n",
165
+ " self.run_button.on_click(self._setup_environment)\n",
166
+ "\n",
167
+ " # Reset rest of state\n",
168
+ " self.reset_state()\n",
169
+ "\n",
170
+ " def reset_state(self, close_all=False):\n",
171
+ "\n",
172
+ " if close_all:\n",
173
+ " self.learning_objectives_text.close()\n",
174
+ " self.file_upload.close()\n",
175
+ " self.file_upload_box.close()\n",
176
+ " #self.ui_container.close()\n",
177
+ "\n",
178
+ " self.learning_objectives_text = widgets.Textarea(value='', description='Learning Objectives',\n",
179
+ " placeholder='Learning objectives: 1. Understand and implement classes in object-oriented programming',\n",
180
+ " layout=self.items_layout,\n",
181
+ " style={'description_width': 'initial'})\n",
182
+ " self.file_upload = widgets.FileUpload(\n",
183
+ " accept='.zip', # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'\n",
184
+ " multiple=False # True to accept multiple files upload else False\n",
185
+ " )\n",
186
+ " self.file_upload_box = widgets.HBox([widgets.Label('Upload User Files:\\t'), self.file_upload])\n",
187
+ "\n",
188
+ "\n",
189
+ " # Create a VBox container to arrange the widgets vertically\n",
190
+ " self.ui_container = widgets.VBox([self.ui_title, self.learning_objectives_text,\n",
191
+ " self.file_upload_box, self.run_button, self.status_output],\n",
192
+ " layout=self.box_layout)\n",
193
+ "\n",
194
+ "\n",
195
+ " def _setup_environment(self, btn):\n",
196
+ " grade_settings['learning_objectives'] = self.learning_objectives_text.value\n",
197
+ " grade_settings['json_file_path'] = self.file_upload.value\n",
198
+ "\n",
199
+ " if self.file_upload.value:\n",
200
+ " try:\n",
201
+ " input_file = list(self.file_upload.value.values())[0]\n",
202
+ " extracted_zip_dir = list(grade_settings['json_file_path'].keys())[0][:-4]\n",
203
+ " except:\n",
204
+ " input_file = self.file_upload.value[0]\n",
205
+ " extracted_zip_dir = self.file_upload.value[0]['name'][:-4]\n",
206
+ "\n",
207
+ " self.status_output.clear_output()\n",
208
+ " self.status_output.append_stdout('Loading zip file...\\n')\n",
209
+ "\n",
210
+ " with zipfile.ZipFile(io.BytesIO(input_file['content']), \"r\") as z:\n",
211
+ " z.extractall()\n",
212
+ " extracted_files = z.namelist()\n",
213
+ "\n",
214
+ " self.status_output.append_stdout('Extracted files and directories: {0}\\n'.format(', '.join(extracted_files)))\n",
215
+ "\n",
216
+ " # load all json files\n",
217
+ " grade_settings['json_files'] = glob.glob(''.join([extracted_zip_dir, '/**/*.json']), recursive=True)\n",
218
+ "\n",
219
+ " #status_output.clear_output()\n",
220
+ " self.status_output.append_stdout('Loading successful!\\nLearning Objectives: {0}\\nExtracted JSON files: {1}'.format(grade_settings['learning_objectives'],\n",
221
+ " ', '.join(grade_settings['json_files'])))\n",
222
+ "\n",
223
+ " else:\n",
224
+ " self.status_output.clear_output()\n",
225
+ " self.status_output.append_stdout('Please upload a zip file.')\n",
226
+ "\n",
227
+ " # Clear values so they're not saved\n",
228
+ " self.learning_objectives_text.value = ''\n",
229
+ " self.reset_state(close_all=True)\n",
230
+ " self.run_ui_container()\n",
231
+ "\n",
232
+ " with self.status_output:\n",
233
+ " print('Extracted files and directories: {0}\\n'.format(', '.join(extracted_files)))\n",
234
+ " print('Loading successful!\\nLearning Objectives: {0}\\nExtracted JSON files: {1}'.format(grade_settings['learning_objectives'],\n",
235
+ " ', '.join(grade_settings['json_files'])))\n",
236
+ " print('Submitted and Reset all values.')\n",
237
+ "\n",
238
+ "\n",
239
+ " def run_ui_container(self):\n",
240
+ " display(self.ui_container, clear=True)"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "execution_count": null,
246
+ "metadata": {
247
+ "colab": {
248
+ "base_uri": "https://localhost:8080/",
249
+ "height": 16
250
+ },
251
+ "id": "4wCQ4Wk8YiG4",
252
+ "outputId": "5c602e80-a210-4449-fd6c-eb8bf3213407"
253
+ },
254
+ "outputs": [
255
+ {
256
+ "data": {
257
+ "text/html": [
258
+ "\n",
259
+ " <style>\n",
260
+ " pre {\n",
261
+ " white-space: pre-wrap;\n",
262
+ " }\n",
263
+ " </style>\n",
264
+ " "
265
+ ],
266
+ "text/plain": [
267
+ "<IPython.core.display.HTML object>"
268
+ ]
269
+ },
270
+ "metadata": {},
271
+ "output_type": "display_data"
272
+ }
273
+ ],
274
+ "source": [
275
+ "#This code helps in the case that we have problems with metadata being retained.\n",
276
+ "#!jupyter nbconvert --ClearOutputPreprocessor.enabled=True --ClearMetadataPreprocessor.enabled=True --ClearMetadataPreprocessor.preserve_cell_metadata_mask \"colab\" --ClearMetadataPreprocessor.preserve_cell_metadata_mask \"kernelspec\" --ClearMetadataPreprocessor.preserve_cell_metadata_mask \"language_info\" --to=notebook --output=instructor_inst_notebook.ipynb instructor_intr_notebook.ipynb"
277
+ ]
278
+ },
279
+ {
280
+ "cell_type": "markdown",
281
+ "metadata": {
282
+ "id": "gj1K3MjHDlqb"
283
+ },
284
+ "source": [
285
+ "# User Settings and Submission Upload\n",
286
+ "The following two cells will ask you for your OpenAI API credentials and to upload the json file of the student submission."
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": 4,
292
+ "metadata": {
293
+ "colab": {
294
+ "base_uri": "https://localhost:8080/",
295
+ "height": 519,
296
+ "referenced_widgets": [
297
+ "a84d31fb8f4e4bafb74035158834b404",
298
+ "b051a90758434644955747bc02d00bab",
299
+ "252b8009f3734ed2908049ebb40c0247",
300
+ "6622f76f91f44527a87a7575bbd388d2",
301
+ "654ab6d155eb457ea5c719a9ac27ad5b",
302
+ "86cb4f568f454ff8832face502fb0745",
303
+ "e30fe87f01bc4580a61713b5b72439a2",
304
+ "d16b25c7e9e948938c9303fbe8ae3dcc",
305
+ "453b12da4b6540cd9e4e57f73a4d670c",
306
+ "b74cf92175374028948d4cf529d4d1e6",
307
+ "f7d75b0a32554a9589c513336fc30095",
308
+ "7f7164e80a464ba9b99f96c10132db25",
309
+ "49f80567705147f0b82d45b7f06dd1ba",
310
+ "5a17f4509d194105b23dd616e45183d5",
311
+ "81c4dda35a7d4e15821bb4bc0973354e",
312
+ "df1c46361f714aceb9c046f98fede40c",
313
+ "60b80d550efa403a825a3cb913c26f53",
314
+ "d0bd0e3f12594ff1a51365b65a3fcc43",
315
+ "dfa8d6c7d70b42468cbda035de89404c",
316
+ "26d13984d45745858d3b890bc7f18a90",
317
+ "53722998fbe64a7c94829b79e8cd69d6",
318
+ "1b7ee0de15484cd5aecd6d8ca3b6ee9d",
319
+ "dde20647d3594d31b66b19659f53a95e",
320
+ "8610fffd2d2a4ec28f8c874c06073ce7",
321
+ "54e3918921f44fb4a9020beab951fcdf",
322
+ "1072a8a142f64dfd96ee528a2e9d1595",
323
+ "67b4083cd4234f52bb7cca27ab9cddb3",
324
+ "d0a1ebdf7fc0473f91c39b29ca580934",
325
+ "abbecdc637694e7cb026e003244e7037",
326
+ "7f814595d31e4b86992b5bd6bc85ced4",
327
+ "76548751bb9c4bcb9d4f39788ea7d4af",
328
+ "dbb88901f5084d49af208b91b52b6073"
329
+ ]
330
+ },
331
+ "id": "oQOeYl9OYiG5",
332
+ "outputId": "bb5b7dc4-ea7b-41ea-e741-2fb2bf66cccc"
333
+ },
334
+ "outputs": [
335
+ {
336
+ "data": {
337
+ "application/vnd.jupyter.widget-view+json": {
338
+ "model_id": "1b7ee0de15484cd5aecd6d8ca3b6ee9d",
339
+ "version_major": 2,
340
+ "version_minor": 0
341
+ },
342
+ "text/plain": [
343
+ "VBox(children=(HTML(value='<h2>Instructor Grading Configuration</h2>'), Textarea(value='', description='Learni…"
344
+ ]
345
+ },
346
+ "metadata": {},
347
+ "output_type": "display_data"
348
+ }
349
+ ],
350
+ "source": [
351
+ "InstructorGradingConfig().run_ui_container()"
352
+ ]
353
+ },
354
+ {
355
+ "cell_type": "markdown",
356
+ "metadata": {
357
+ "id": "W9SqmkpeIgpk"
358
+ },
359
+ "source": [
360
+ "You will need an OpenAI API key in order to access the chat functionality. In the following cell, you'll see a blank box pop up - copy your API key there and press enter."
361
+ ]
362
+ },
363
+ {
364
+ "cell_type": "code",
365
+ "execution_count": 5,
366
+ "metadata": {
367
+ "colab": {
368
+ "base_uri": "https://localhost:8080/",
369
+ "height": 32
370
+ },
371
+ "id": "MK8R5DmEYiG5",
372
+ "outputId": "09e11ee6-5a9f-4b61-ff61-ddf82a68c498"
373
+ },
374
+ "outputs": [
375
+ {
376
+ "data": {
377
+ "text/html": [
378
+ "\n",
379
+ " <style>\n",
380
+ " pre {\n",
381
+ " white-space: pre-wrap;\n",
382
+ " }\n",
383
+ " </style>\n",
384
+ " "
385
+ ],
386
+ "text/plain": [
387
+ "<IPython.core.display.HTML object>"
388
+ ]
389
+ },
390
+ "metadata": {},
391
+ "output_type": "display_data"
392
+ },
393
+ {
394
+ "name": "stdout",
395
+ "output_type": "stream",
396
+ "text": [
397
+ "··········\n"
398
+ ]
399
+ }
400
+ ],
401
+ "source": [
402
+ "# setup open AI api key\n",
403
+ "openai_api_key = getpass()"
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "markdown",
408
+ "metadata": {
409
+ "collapsed": true,
410
+ "id": "0bp158bj_0s6"
411
+ },
412
+ "source": [
413
+ "# Execute Grading\n",
414
+ "Run this cell set to have the generative AI assist you in grading."
415
+ ]
416
+ },
417
+ {
418
+ "cell_type": "markdown",
419
+ "metadata": {
420
+ "id": "vyJuQ7RUR8tB"
421
+ },
422
+ "source": [
423
+ "## Installation and Loading"
424
+ ]
425
+ },
426
+ {
427
+ "cell_type": "code",
428
+ "execution_count": 6,
429
+ "metadata": {
430
+ "colab": {
431
+ "base_uri": "https://localhost:8080/",
432
+ "height": 16
433
+ },
434
+ "id": "tjKxWLA3YiG5",
435
+ "outputId": "6dc85dff-4baa-44f0-edef-42925e6c271a"
436
+ },
437
+ "outputs": [
438
+ {
439
+ "data": {
440
+ "text/html": [
441
+ "\n",
442
+ " <style>\n",
443
+ " pre {\n",
444
+ " white-space: pre-wrap;\n",
445
+ " }\n",
446
+ " </style>\n",
447
+ " "
448
+ ],
449
+ "text/plain": [
450
+ "<IPython.core.display.HTML object>"
451
+ ]
452
+ },
453
+ "metadata": {},
454
+ "output_type": "display_data"
455
+ }
456
+ ],
457
+ "source": [
458
+ "%%capture\n",
459
+ "# install additional packages if needed\n",
460
+ "! pip install -q langchain openai"
461
+ ]
462
+ },
463
+ {
464
+ "cell_type": "code",
465
+ "execution_count": 7,
466
+ "metadata": {
467
+ "colab": {
468
+ "base_uri": "https://localhost:8080/",
469
+ "height": 16
470
+ },
471
+ "id": "S3oQiNm_YiG5",
472
+ "outputId": "95e43744-fadc-45cc-bb02-05737d12fcb2"
473
+ },
474
+ "outputs": [
475
+ {
476
+ "data": {
477
+ "text/html": [
478
+ "\n",
479
+ " <style>\n",
480
+ " pre {\n",
481
+ " white-space: pre-wrap;\n",
482
+ " }\n",
483
+ " </style>\n",
484
+ " "
485
+ ],
486
+ "text/plain": [
487
+ "<IPython.core.display.HTML object>"
488
+ ]
489
+ },
490
+ "metadata": {},
491
+ "output_type": "display_data"
492
+ }
493
+ ],
494
+ "source": [
495
+ "# import necessary libraries here\n",
496
+ "from langchain.llms import OpenAI\n",
497
+ "from langchain.chat_models import ChatOpenAI\n",
498
+ "from langchain.prompts import PromptTemplate\n",
499
+ "from langchain.document_loaders import TextLoader\n",
500
+ "from langchain.indexes import VectorstoreIndexCreator\n",
501
+ "from langchain.text_splitter import CharacterTextSplitter\n",
502
+ "from langchain.embeddings import OpenAIEmbeddings\n",
503
+ "from langchain.schema import SystemMessage, HumanMessage, AIMessage\n",
504
+ "import openai"
505
+ ]
506
+ },
507
+ {
508
+ "cell_type": "code",
509
+ "execution_count": 8,
510
+ "metadata": {
511
+ "colab": {
512
+ "base_uri": "https://localhost:8080/",
513
+ "height": 16
514
+ },
515
+ "id": "uXfSTQPrYiG5",
516
+ "outputId": "f85a6c4a-009f-4b30-f74a-04b6bfd85af6"
517
+ },
518
+ "outputs": [
519
+ {
520
+ "data": {
521
+ "text/html": [
522
+ "\n",
523
+ " <style>\n",
524
+ " pre {\n",
525
+ " white-space: pre-wrap;\n",
526
+ " }\n",
527
+ " </style>\n",
528
+ " "
529
+ ],
530
+ "text/plain": [
531
+ "<IPython.core.display.HTML object>"
532
+ ]
533
+ },
534
+ "metadata": {},
535
+ "output_type": "display_data"
536
+ }
537
+ ],
538
+ "source": [
539
+ "# Helper because lines are printed too long; helps with wrapping visualization\n",
540
+ "from IPython.display import HTML, display\n",
541
+ "\n",
542
+ "def set_css():\n",
543
+ " display(HTML('''\n",
544
+ " <style>\n",
545
+ " pre {\n",
546
+ " white-space: pre-wrap;\n",
547
+ " }\n",
548
+ " </style>\n",
549
+ " '''))\n",
550
+ "get_ipython().events.register('pre_run_cell', set_css)"
551
+ ]
552
+ },
553
+ {
554
+ "cell_type": "code",
555
+ "execution_count": 9,
556
+ "metadata": {
557
+ "colab": {
558
+ "base_uri": "https://localhost:8080/",
559
+ "height": 16
560
+ },
561
+ "id": "sTQFW9TxYiG5",
562
+ "outputId": "e291e167-e635-4006-965d-29fe1a0db10f"
563
+ },
564
+ "outputs": [
565
+ {
566
+ "data": {
567
+ "text/html": [
568
+ "\n",
569
+ " <style>\n",
570
+ " pre {\n",
571
+ " white-space: pre-wrap;\n",
572
+ " }\n",
573
+ " </style>\n",
574
+ " "
575
+ ],
576
+ "text/plain": [
577
+ "<IPython.core.display.HTML object>"
578
+ ]
579
+ },
580
+ "metadata": {},
581
+ "output_type": "display_data"
582
+ },
583
+ {
584
+ "data": {
585
+ "text/html": [
586
+ "\n",
587
+ " <style>\n",
588
+ " pre {\n",
589
+ " white-space: pre-wrap;\n",
590
+ " }\n",
591
+ " </style>\n",
592
+ " "
593
+ ],
594
+ "text/plain": [
595
+ "<IPython.core.display.HTML object>"
596
+ ]
597
+ },
598
+ "metadata": {},
599
+ "output_type": "display_data"
600
+ }
601
+ ],
602
+ "source": [
603
+ "# Set pandas display options\n",
604
+ "pd.set_option('display.max_columns', None)\n",
605
+ "pd.set_option('display.max_colwidth', 0)"
606
+ ]
607
+ },
608
+ {
609
+ "cell_type": "markdown",
610
+ "metadata": {
611
+ "id": "DOACT_LSSM58"
612
+ },
613
+ "source": [
614
+ "Setting of API key in environment and other settings"
615
+ ]
616
+ },
617
+ {
618
+ "cell_type": "code",
619
+ "execution_count": 10,
620
+ "metadata": {
621
+ "colab": {
622
+ "base_uri": "https://localhost:8080/",
623
+ "height": 16
624
+ },
625
+ "id": "OV05xRtDYiG5",
626
+ "outputId": "0d6339d9-bc32-49e9-955f-99947b510456"
627
+ },
628
+ "outputs": [
629
+ {
630
+ "data": {
631
+ "text/html": [
632
+ "\n",
633
+ " <style>\n",
634
+ " pre {\n",
635
+ " white-space: pre-wrap;\n",
636
+ " }\n",
637
+ " </style>\n",
638
+ " "
639
+ ],
640
+ "text/plain": [
641
+ "<IPython.core.display.HTML object>"
642
+ ]
643
+ },
644
+ "metadata": {},
645
+ "output_type": "display_data"
646
+ },
647
+ {
648
+ "data": {
649
+ "text/html": [
650
+ "\n",
651
+ " <style>\n",
652
+ " pre {\n",
653
+ " white-space: pre-wrap;\n",
654
+ " }\n",
655
+ " </style>\n",
656
+ " "
657
+ ],
658
+ "text/plain": [
659
+ "<IPython.core.display.HTML object>"
660
+ ]
661
+ },
662
+ "metadata": {},
663
+ "output_type": "display_data"
664
+ }
665
+ ],
666
+ "source": [
667
+ "#extract info from dictionary\n",
668
+ "json_file_path = grade_settings['json_file_path']\n",
669
+ "learning_objectives = grade_settings['learning_objectives']\n",
670
+ "\n",
671
+ "#set API key\n",
672
+ "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
673
+ "openai.api_key = openai_api_key"
674
+ ]
675
+ },
676
+ {
677
+ "cell_type": "markdown",
678
+ "metadata": {
679
+ "id": "YreIs-I-tuxx"
680
+ },
681
+ "source": [
682
+ "Initiate the OpenAI model using Langchain."
683
+ ]
684
+ },
685
+ {
686
+ "cell_type": "code",
687
+ "execution_count": 11,
688
+ "metadata": {
689
+ "colab": {
690
+ "base_uri": "https://localhost:8080/",
691
+ "height": 16
692
+ },
693
+ "id": "ZRn9wbJBYiG5",
694
+ "outputId": "c09c7a7c-1ca0-4860-b39d-91778f183307"
695
+ },
696
+ "outputs": [
697
+ {
698
+ "data": {
699
+ "text/html": [
700
+ "\n",
701
+ " <style>\n",
702
+ " pre {\n",
703
+ " white-space: pre-wrap;\n",
704
+ " }\n",
705
+ " </style>\n",
706
+ " "
707
+ ],
708
+ "text/plain": [
709
+ "<IPython.core.display.HTML object>"
710
+ ]
711
+ },
712
+ "metadata": {},
713
+ "output_type": "display_data"
714
+ },
715
+ {
716
+ "data": {
717
+ "text/html": [
718
+ "\n",
719
+ " <style>\n",
720
+ " pre {\n",
721
+ " white-space: pre-wrap;\n",
722
+ " }\n",
723
+ " </style>\n",
724
+ " "
725
+ ],
726
+ "text/plain": [
727
+ "<IPython.core.display.HTML object>"
728
+ ]
729
+ },
730
+ "metadata": {},
731
+ "output_type": "display_data"
732
+ }
733
+ ],
734
+ "source": [
735
+ "llm = ChatOpenAI(model='gpt-3.5-turbo-16k')\n",
736
+ "messages = [\n",
737
+ " SystemMessage(content=\"You are a helpful assistant.\"),\n",
738
+ " HumanMessage(content=\"\")\n",
739
+ "]"
740
+ ]
741
+ },
742
+ {
743
+ "cell_type": "markdown",
744
+ "metadata": {
745
+ "id": "pIKYtr0UTJNc"
746
+ },
747
+ "source": [
748
+ "## Functions to help with loading json"
749
+ ]
750
+ },
751
+ {
752
+ "cell_type": "markdown",
753
+ "metadata": {
754
+ "id": "t7O3XPC29Osw"
755
+ },
756
+ "source": [
757
+ "`file_upload_json_to_df` helps when you use the file uploader as the json is directly read in this case. `clean_keys` helps when there are errors on the keys when reading."
758
+ ]
759
+ },
760
+ {
761
+ "cell_type": "code",
762
+ "execution_count": 12,
763
+ "metadata": {
764
+ "colab": {
765
+ "base_uri": "https://localhost:8080/",
766
+ "height": 16
767
+ },
768
+ "id": "qGxPHexrYiG5",
769
+ "outputId": "80657bb1-97e8-423a-a2ff-99afe8d22718"
770
+ },
771
+ "outputs": [
772
+ {
773
+ "data": {
774
+ "text/html": [
775
+ "\n",
776
+ " <style>\n",
777
+ " pre {\n",
778
+ " white-space: pre-wrap;\n",
779
+ " }\n",
780
+ " </style>\n",
781
+ " "
782
+ ],
783
+ "text/plain": [
784
+ "<IPython.core.display.HTML object>"
785
+ ]
786
+ },
787
+ "metadata": {},
788
+ "output_type": "display_data"
789
+ },
790
+ {
791
+ "data": {
792
+ "text/html": [
793
+ "\n",
794
+ " <style>\n",
795
+ " pre {\n",
796
+ " white-space: pre-wrap;\n",
797
+ " }\n",
798
+ " </style>\n",
799
+ " "
800
+ ],
801
+ "text/plain": [
802
+ "<IPython.core.display.HTML object>"
803
+ ]
804
+ },
805
+ "metadata": {},
806
+ "output_type": "display_data"
807
+ }
808
+ ],
809
+ "source": [
810
+ "# Strip beginning and ending newlines\n",
811
+ "def clean_keys(loaded_json):\n",
812
+ " out_json = [{key.strip():value for key, value in json_dict.items()} for json_dict in loaded_json ]\n",
813
+ " return out_json\n",
814
+ "\n",
815
+ "# Convert difficult datatypes to newlines\n",
816
+ "def file_upload_json_to_df(upload_json):\n",
817
+ "\n",
818
+ " #get middle key of json to extract content\n",
819
+ " fname = list(upload_json.keys())[0]\n",
820
+ "\n",
821
+ " #load the json; strict allows us to get around encoding issues\n",
822
+ " loaded_json = json.loads(upload_json[fname]['content'], strict=False)\n",
823
+ "\n",
824
+ " #clean the keys if needed\n",
825
+ " loaded_json = clean_keys(loaded_json)\n",
826
+ "\n",
827
+ " return pd.DataFrame(loaded_json)"
828
+ ]
829
+ },
830
+ {
831
+ "cell_type": "markdown",
832
+ "metadata": {
833
+ "id": "N2yuYFQJYiG6"
834
+ },
835
+ "source": [
836
+ "`create_user_dataframe` filters based on role to create a dataframe for only user responses"
837
+ ]
838
+ },
839
+ {
840
+ "cell_type": "code",
841
+ "execution_count": 13,
842
+ "metadata": {
843
+ "colab": {
844
+ "base_uri": "https://localhost:8080/",
845
+ "height": 17
846
+ },
847
+ "id": "58hygjTXYiG6",
848
+ "outputId": "8f3683fb-f3da-45f3-8338-772e7583d4cc"
849
+ },
850
+ "outputs": [
851
+ {
852
+ "data": {
853
+ "text/html": [
854
+ "\n",
855
+ " <style>\n",
856
+ " pre {\n",
857
+ " white-space: pre-wrap;\n",
858
+ " }\n",
859
+ " </style>\n",
860
+ " "
861
+ ],
862
+ "text/plain": [
863
+ "<IPython.core.display.HTML object>"
864
+ ]
865
+ },
866
+ "metadata": {},
867
+ "output_type": "display_data"
868
+ }
869
+ ],
870
+ "source": [
871
+ "def create_user_dataframe(df):\n",
872
+ " df_user = df.query(\"`author` == 'user'\")\n",
873
+ "\n",
874
+ " return df_user"
875
+ ]
876
+ },
877
+ {
878
+ "cell_type": "markdown",
879
+ "metadata": {
880
+ "id": "MOwaLI97Igpm"
881
+ },
882
+ "source": [
883
+ "`load_json_as_df` helps when you use the file uploader as the json is directly read in this case. It accepts the path to the JSON to load the dataframe based on the json."
884
+ ]
885
+ },
886
+ {
887
+ "cell_type": "code",
888
+ "execution_count": 131,
889
+ "metadata": {
890
+ "colab": {
891
+ "base_uri": "https://localhost:8080/",
892
+ "height": 16
893
+ },
894
+ "id": "w0xN9CJeYiG6",
895
+ "outputId": "9422452f-9a97-4f22-9e49-ea0812c298fd"
896
+ },
897
+ "outputs": [
898
+ {
899
+ "data": {
900
+ "text/html": [
901
+ "\n",
902
+ " <style>\n",
903
+ " pre {\n",
904
+ " white-space: pre-wrap;\n",
905
+ " }\n",
906
+ " </style>\n",
907
+ " "
908
+ ],
909
+ "text/plain": [
910
+ "<IPython.core.display.HTML object>"
911
+ ]
912
+ },
913
+ "metadata": {},
914
+ "output_type": "display_data"
915
+ },
916
+ {
917
+ "data": {
918
+ "text/html": [
919
+ "\n",
920
+ " <style>\n",
921
+ " pre {\n",
922
+ " white-space: pre-wrap;\n",
923
+ " }\n",
924
+ " </style>\n",
925
+ " "
926
+ ],
927
+ "text/plain": [
928
+ "<IPython.core.display.HTML object>"
929
+ ]
930
+ },
931
+ "metadata": {},
932
+ "output_type": "display_data"
933
+ }
934
+ ],
935
+ "source": [
936
+ "def load_json_as_df(fpath):\n",
937
+ " # check if file is .json\n",
938
+ " if not fpath.endswith('.json'):\n",
939
+ " return None\n",
940
+ "\n",
941
+ " keys = [\"timestamp\", \"author\", \"message\"]\n",
942
+ "\n",
943
+ " df_out = None\n",
944
+ " out_error = None\n",
945
+ "\n",
946
+ " try:\n",
947
+ " # Read JSON file\n",
948
+ " with open(fpath, \"r\") as f:\n",
949
+ " json_data = f.read()\n",
950
+ "\n",
951
+ " # Load JSON data\n",
952
+ " data = json.loads(json_data, strict=False)\n",
953
+ "\n",
954
+ " # Quick check to see if we can fix common errors in json\n",
955
+ " # 1. JSON responses wrapped in enclosing dictionary\n",
956
+ " if isinstance(data, dict):\n",
957
+ " if len(data.keys()) == 1:\n",
958
+ " data = data[list(data.keys())[0]]\n",
959
+ " else:\n",
960
+ " data = [data] # convert to list otherwise\n",
961
+ "\n",
962
+ " # We only operate on lists of dictionaries\n",
963
+ " if isinstance(data, list):\n",
964
+ " data = clean_keys(data) # clean keys to make sure there are no unnecessary newlines\n",
965
+ "\n",
966
+ " if all(all(k in d for k in keys) for d in data):\n",
967
+ " # Filter only the student messages based on the \"author\" key\n",
968
+ " data = [d for d in data if d[\"author\"].lower() == \"user\"]\n",
969
+ "\n",
970
+ " df_out = pd.json_normalize(data)\n",
971
+ " if len(df_out) <= 1:\n",
972
+ " out_error = [fpath, \"Warning: JSON keys correct, but something wrong with the overall structure of the JSON when converting to the dataframe. The dataframe only has one row. Skipping.\"]\n",
973
+ " df_out = None\n",
974
+ " else:\n",
975
+ " out_error = [fpath, \"Error: JSON Keys are incorrect. Found keys: \" + str(list(data[0].keys()))]\n",
976
+ " else:\n",
977
+ " out_error = [fpath, \"Error: Something is wrong with the structure of the JSON.\"]\n",
978
+ "\n",
979
+ " except Exception as e:\n",
980
+ " print(f\"Error processing file {fpath}: {str(e)}\")\n",
981
+ " out_error = [fpath, \"Fatal System Error: \" + str(e)]\n",
982
+ "\n",
983
+ " if df_out is not None:\n",
984
+ " df_out['filename'] = fpath\n",
985
+ "\n",
986
+ " return df_out, out_error"
987
+ ]
988
+ },
989
+ {
990
+ "cell_type": "markdown",
991
+ "metadata": {
992
+ "id": "N2yuYFQJYiG6"
993
+ },
994
+ "source": [
995
+ "`create_user_dataframe` filters based on role to create a dataframe for only user responses"
996
+ ]
997
+ },
998
+ {
999
+ "cell_type": "code",
1000
+ "execution_count": 132,
1001
+ "metadata": {
1002
+ "colab": {
1003
+ "base_uri": "https://localhost:8080/",
1004
+ "height": 16
1005
+ },
1006
+ "id": "58hygjTXYiG6",
1007
+ "outputId": "44b588d6-6b6c-4c9e-b944-62ac29117344"
1008
+ },
1009
+ "outputs": [
1010
+ {
1011
+ "data": {
1012
+ "text/html": [
1013
+ "\n",
1014
+ " <style>\n",
1015
+ " pre {\n",
1016
+ " white-space: pre-wrap;\n",
1017
+ " }\n",
1018
+ " </style>\n",
1019
+ " "
1020
+ ],
1021
+ "text/plain": [
1022
+ "<IPython.core.display.HTML object>"
1023
+ ]
1024
+ },
1025
+ "metadata": {},
1026
+ "output_type": "display_data"
1027
+ },
1028
+ {
1029
+ "data": {
1030
+ "text/html": [
1031
+ "\n",
1032
+ " <style>\n",
1033
+ " pre {\n",
1034
+ " white-space: pre-wrap;\n",
1035
+ " }\n",
1036
+ " </style>\n",
1037
+ " "
1038
+ ],
1039
+ "text/plain": [
1040
+ "<IPython.core.display.HTML object>"
1041
+ ]
1042
+ },
1043
+ "metadata": {},
1044
+ "output_type": "display_data"
1045
+ }
1046
+ ],
1047
+ "source": [
1048
+ "def create_user_dataframe(df):\n",
1049
+ " df_user = df.query(\"`author` == 'user'\")\n",
1050
+ "\n",
1051
+ " return df_user"
1052
+ ]
1053
+ },
1054
+ {
1055
+ "cell_type": "markdown",
1056
+ "metadata": {
1057
+ "id": "KA5moX-1Igpn"
1058
+ },
1059
+ "source": [
1060
+ "The `process_file` and `process_files` functions provide the implementation of prompt templates for instructor grading. It uses the input components to assemble a prompt and then sends this prompt to the llm for evaluation alongside the read dataframes."
1061
+ ]
1062
+ },
1063
+ {
1064
+ "cell_type": "code",
1065
+ "execution_count": 15,
1066
+ "metadata": {
1067
+ "colab": {
1068
+ "base_uri": "https://localhost:8080/",
1069
+ "height": 16
1070
+ },
1071
+ "id": "nFz3UVL3YiG6",
1072
+ "outputId": "01029e5f-b313-4941-d572-6dca5903d4ac"
1073
+ },
1074
+ "outputs": [
1075
+ {
1076
+ "data": {
1077
+ "text/html": [
1078
+ "\n",
1079
+ " <style>\n",
1080
+ " pre {\n",
1081
+ " white-space: pre-wrap;\n",
1082
+ " }\n",
1083
+ " </style>\n",
1084
+ " "
1085
+ ],
1086
+ "text/plain": [
1087
+ "<IPython.core.display.HTML object>"
1088
+ ]
1089
+ },
1090
+ "metadata": {},
1091
+ "output_type": "display_data"
1092
+ },
1093
+ {
1094
+ "data": {
1095
+ "text/html": [
1096
+ "\n",
1097
+ " <style>\n",
1098
+ " pre {\n",
1099
+ " white-space: pre-wrap;\n",
1100
+ " }\n",
1101
+ " </style>\n",
1102
+ " "
1103
+ ],
1104
+ "text/plain": [
1105
+ "<IPython.core.display.HTML object>"
1106
+ ]
1107
+ },
1108
+ "metadata": {},
1109
+ "output_type": "display_data"
1110
+ }
1111
+ ],
1112
+ "source": [
1113
+ "def process_file(df, desc, instr, print_results):\n",
1114
+ " messages_as_string = '\\n'.join(df['message'].astype(str))\n",
1115
+ " context = messages_as_string\n",
1116
+ "\n",
1117
+ " # Assemble prompt\n",
1118
+ " prompt = desc if desc is not None else \"\"\n",
1119
+ " prompt = (prompt + instr + \"\\n\") if instr is not None else prompt\n",
1120
+ " prompt = prompt + \"Here is the chat log: \\n\\n\" + context + \"\\n\"\n",
1121
+ "\n",
1122
+ " # Get results and optionally print\n",
1123
+ " messages[1] = HumanMessage(content=prompt)\n",
1124
+ " result = llm(messages)\n",
1125
+ "\n",
1126
+ " # Check if 'filename' exists in df\n",
1127
+ " if 'filename' in df:\n",
1128
+ " if print_results:\n",
1129
+ " print(f\"\\n\\nResult for file {df['filename'][0]}: \\n{result.content}\")\n",
1130
+ " else:\n",
1131
+ " if print_results:\n",
1132
+ " print(f\"\\n\\nResult for file: Unknown Filename \\n{result.content}\")\n",
1133
+ "\n",
1134
+ " return result\n",
1135
+ "\n",
1136
+ "def process_files(json_dfs, output_desc=None, grad_instructions=None, use_defaults = False, print_results=True):\n",
1137
+ " if use_defaults:\n",
1138
+ " output_desc = (\"Given the following chat log, create a table with the question number, the question content, answer, \"\n",
1139
+ " \"whether or not the student answered correctly on the first try, and the number of attempts it took to get the right answer. \")\n",
1140
+ " grad_instructions = (\"Then, calculate the quiz grade from the total number of assessment questions. \"\n",
1141
+ " \"Importantly, a point should only be granted if an answer was correct on the very first attempt. \"\n",
1142
+ " \"If an answer was not correct on the first attempt, even if it was correct in subsequent attempts, no point should be awarded for that question. \")\n",
1143
+ "\n",
1144
+ " results = [process_file(df, output_desc, grad_instructions, print_results) for df in json_dfs]\n",
1145
+ "\n",
1146
+ " return results"
1147
+ ]
1148
+ },
1149
+ {
1150
+ "cell_type": "code",
1151
+ "execution_count": 16,
1152
+ "metadata": {
1153
+ "colab": {
1154
+ "base_uri": "https://localhost:8080/",
1155
+ "height": 17
1156
+ },
1157
+ "id": "EhryP8utrR9D",
1158
+ "outputId": "51f4a60d-d6b7-4885-85c4-410c741ed651"
1159
+ },
1160
+ "outputs": [
1161
+ {
1162
+ "data": {
1163
+ "text/html": [
1164
+ "\n",
1165
+ " <style>\n",
1166
+ " pre {\n",
1167
+ " white-space: pre-wrap;\n",
1168
+ " }\n",
1169
+ " </style>\n",
1170
+ " "
1171
+ ],
1172
+ "text/plain": [
1173
+ "<IPython.core.display.HTML object>"
1174
+ ]
1175
+ },
1176
+ "metadata": {},
1177
+ "output_type": "display_data"
1178
+ }
1179
+ ],
1180
+ "source": [
1181
+ "def output_log_file(df_list, results_list, log_file='evaluation_log.txt'):\n",
1182
+ " \"\"\"\n",
1183
+ " Create a single log file containing evaluation results for all students.\n",
1184
+ "\n",
1185
+ " Parameters:\n",
1186
+ " df_list (list of pandas.DataFrame): List of DataFrames.\n",
1187
+ " results_list (list of ai_model_response): List of evaluation results.\n",
1188
+ " log_file (str): File name where the evaluation log will be saved. Default is 'evaluation_log.txt'.\n",
1189
+ "\n",
1190
+ " Returns:\n",
1191
+ " None\n",
1192
+ " \"\"\"\n",
1193
+ " with open(log_file, 'w') as log:\n",
1194
+ " for df, result in zip(df_list, results_list):\n",
1195
+ " log.write(f\"File: {df['filename'][0]}\\n\")\n",
1196
+ " log.write(result.content)\n",
1197
+ " log.write(\"\\n\\n\")"
1198
+ ]
1199
+ },
1200
+ {
1201
+ "cell_type": "markdown",
1202
+ "metadata": {
1203
+ "id": "lXQ45cJ1AztR"
1204
+ },
1205
+ "source": [
1206
+ "`pretty_print` makes dataframes look better when printed by substituting non-HTML with HTML for rendering."
1207
+ ]
1208
+ },
1209
+ {
1210
+ "cell_type": "code",
1211
+ "execution_count": 134,
1212
+ "metadata": {
1213
+ "colab": {
1214
+ "base_uri": "https://localhost:8080/",
1215
+ "height": 16
1216
+ },
1217
+ "id": "0te_RLOOYiG6",
1218
+ "outputId": "1dc53c98-5f68-4902-b377-7bba451395f0"
1219
+ },
1220
+ "outputs": [
1221
+ {
1222
+ "data": {
1223
+ "text/html": [
1224
+ "\n",
1225
+ " <style>\n",
1226
+ " pre {\n",
1227
+ " white-space: pre-wrap;\n",
1228
+ " }\n",
1229
+ " </style>\n",
1230
+ " "
1231
+ ],
1232
+ "text/plain": [
1233
+ "<IPython.core.display.HTML object>"
1234
+ ]
1235
+ },
1236
+ "metadata": {},
1237
+ "output_type": "display_data"
1238
+ },
1239
+ {
1240
+ "data": {
1241
+ "text/html": [
1242
+ "\n",
1243
+ " <style>\n",
1244
+ " pre {\n",
1245
+ " white-space: pre-wrap;\n",
1246
+ " }\n",
1247
+ " </style>\n",
1248
+ " "
1249
+ ],
1250
+ "text/plain": [
1251
+ "<IPython.core.display.HTML object>"
1252
+ ]
1253
+ },
1254
+ "metadata": {},
1255
+ "output_type": "display_data"
1256
+ }
1257
+ ],
1258
+ "source": [
1259
+ "def pretty_print(df):\n",
1260
+ " return display( HTML( df.to_html().replace(\"\\\\n\",\"<br>\") ) )"
1261
+ ]
1262
+ },
1263
+ {
1264
+ "cell_type": "markdown",
1265
+ "metadata": {
1266
+ "id": "I3rKk7lJYiG6"
1267
+ },
1268
+ "source": [
1269
+ "`save_as_csv` saves the dataframe as a CSV"
1270
+ ]
1271
+ },
1272
+ {
1273
+ "cell_type": "code",
1274
+ "execution_count": 135,
1275
+ "metadata": {
1276
+ "colab": {
1277
+ "base_uri": "https://localhost:8080/",
1278
+ "height": 16
1279
+ },
1280
+ "id": "DnrH2ldeYiG6",
1281
+ "outputId": "f1f6153b-49db-4145-c188-373685ffdcf4"
1282
+ },
1283
+ "outputs": [
1284
+ {
1285
+ "data": {
1286
+ "text/html": [
1287
+ "\n",
1288
+ " <style>\n",
1289
+ " pre {\n",
1290
+ " white-space: pre-wrap;\n",
1291
+ " }\n",
1292
+ " </style>\n",
1293
+ " "
1294
+ ],
1295
+ "text/plain": [
1296
+ "<IPython.core.display.HTML object>"
1297
+ ]
1298
+ },
1299
+ "metadata": {},
1300
+ "output_type": "display_data"
1301
+ },
1302
+ {
1303
+ "data": {
1304
+ "text/html": [
1305
+ "\n",
1306
+ " <style>\n",
1307
+ " pre {\n",
1308
+ " white-space: pre-wrap;\n",
1309
+ " }\n",
1310
+ " </style>\n",
1311
+ " "
1312
+ ],
1313
+ "text/plain": [
1314
+ "<IPython.core.display.HTML object>"
1315
+ ]
1316
+ },
1317
+ "metadata": {},
1318
+ "output_type": "display_data"
1319
+ }
1320
+ ],
1321
+ "source": [
1322
+ "def save_as_csv(df, file_name):\n",
1323
+ " df.to_csv(file_name, index=False)"
1324
+ ]
1325
+ },
1326
+ {
1327
+ "cell_type": "code",
1328
+ "execution_count": 136,
1329
+ "metadata": {
1330
+ "colab": {
1331
+ "base_uri": "https://localhost:8080/",
1332
+ "height": 16
1333
+ },
1334
+ "id": "Vgo_y8R8bzTE",
1335
+ "outputId": "f5effec2-8620-4c1d-be15-672b8cb3de21"
1336
+ },
1337
+ "outputs": [
1338
+ {
1339
+ "data": {
1340
+ "text/html": [
1341
+ "\n",
1342
+ " <style>\n",
1343
+ " pre {\n",
1344
+ " white-space: pre-wrap;\n",
1345
+ " }\n",
1346
+ " </style>\n",
1347
+ " "
1348
+ ],
1349
+ "text/plain": [
1350
+ "<IPython.core.display.HTML object>"
1351
+ ]
1352
+ },
1353
+ "metadata": {},
1354
+ "output_type": "display_data"
1355
+ },
1356
+ {
1357
+ "data": {
1358
+ "text/html": [
1359
+ "\n",
1360
+ " <style>\n",
1361
+ " pre {\n",
1362
+ " white-space: pre-wrap;\n",
1363
+ " }\n",
1364
+ " </style>\n",
1365
+ " "
1366
+ ],
1367
+ "text/plain": [
1368
+ "<IPython.core.display.HTML object>"
1369
+ ]
1370
+ },
1371
+ "metadata": {},
1372
+ "output_type": "display_data"
1373
+ }
1374
+ ],
1375
+ "source": [
1376
+ "def show_json_loading_errors(err_list):\n",
1377
+ " if err_list:\n",
1378
+ " print(\"The following files have the following errors upon loading and will NOT be processed:\", '\\n'.join(err_list))\n",
1379
+ " else:\n",
1380
+ " print(\"No errors found in uploaded zip JSON files.\")\n"
1381
+ ]
1382
+ },
1383
+ {
1384
+ "cell_type": "markdown",
1385
+ "metadata": {
1386
+ "id": "85h5oTysJkHs"
1387
+ },
1388
+ "source": [
1389
+ "## Final data preparation steps"
1390
+ ]
1391
+ },
1392
+ {
1393
+ "cell_type": "code",
1394
+ "execution_count": 137,
1395
+ "metadata": {
1396
+ "colab": {
1397
+ "base_uri": "https://localhost:8080/",
1398
+ "height": 16
1399
+ },
1400
+ "id": "Upah5_ygZRZx",
1401
+ "outputId": "639a0d70-6a93-4462-f65a-2e24f30643c0"
1402
+ },
1403
+ "outputs": [
1404
+ {
1405
+ "data": {
1406
+ "text/html": [
1407
+ "\n",
1408
+ " <style>\n",
1409
+ " pre {\n",
1410
+ " white-space: pre-wrap;\n",
1411
+ " }\n",
1412
+ " </style>\n",
1413
+ " "
1414
+ ],
1415
+ "text/plain": [
1416
+ "<IPython.core.display.HTML object>"
1417
+ ]
1418
+ },
1419
+ "metadata": {},
1420
+ "output_type": "display_data"
1421
+ },
1422
+ {
1423
+ "data": {
1424
+ "text/html": [
1425
+ "\n",
1426
+ " <style>\n",
1427
+ " pre {\n",
1428
+ " white-space: pre-wrap;\n",
1429
+ " }\n",
1430
+ " </style>\n",
1431
+ " "
1432
+ ],
1433
+ "text/plain": [
1434
+ "<IPython.core.display.HTML object>"
1435
+ ]
1436
+ },
1437
+ "metadata": {},
1438
+ "output_type": "display_data"
1439
+ }
1440
+ ],
1441
+ "source": [
1442
+ "#additional processing setup\n",
1443
+ "json_files = grade_settings['json_files']\n",
1444
+ "load_responses = [load_json_as_df(jf) for jf in json_files]\n",
1445
+ "\n",
1446
+ "#unzip to two separate lists\n",
1447
+ "all_json_dfs, errors_list = zip(*load_responses)\n",
1448
+ "\n",
1449
+ "# Remove failed JSONs\n",
1450
+ "all_json_dfs = [df for df in all_json_dfs if df is not None]\n",
1451
+ "\n",
1452
+ "# Update errors list to be individual strings\n",
1453
+ "errors_list = [' '.join(err) for err in errors_list if err is not None]"
1454
+ ]
1455
+ },
1456
+ {
1457
+ "cell_type": "markdown",
1458
+ "metadata": {
1459
+ "id": "P_H4uIfmAsr0"
1460
+ },
1461
+ "source": [
1462
+ "# AI-Assisted Evaluation\n",
1463
+ "Introduction and Instructions\n",
1464
+ "--------------------------------------------------\n",
1465
+ "The following example illustrates how you can specify important components of the prompts for sending to the llm. The `process_files` function will iterate over all of the submissions in your zip file, create dataframes of results (via instruction by setting `output_setup`), and also perform evaluation based on your instructions (via instruction by setting `grading_instructions`).\n",
1466
+ "\n",
1467
+ "Example functionality is demonstrated below."
1468
+ ]
1469
+ },
1470
+ {
1471
+ "cell_type": "code",
1472
+ "execution_count": 138,
1473
+ "metadata": {
1474
+ "colab": {
1475
+ "base_uri": "https://localhost:8080/",
1476
+ "height": 35
1477
+ },
1478
+ "id": "9zIPjG5lco3Z",
1479
+ "outputId": "cc9531c0-3939-4c9f-dc6c-9d2cfa0ea7d2"
1480
+ },
1481
+ "outputs": [
1482
+ {
1483
+ "data": {
1484
+ "text/html": [
1485
+ "\n",
1486
+ " <style>\n",
1487
+ " pre {\n",
1488
+ " white-space: pre-wrap;\n",
1489
+ " }\n",
1490
+ " </style>\n",
1491
+ " "
1492
+ ],
1493
+ "text/plain": [
1494
+ "<IPython.core.display.HTML object>"
1495
+ ]
1496
+ },
1497
+ "metadata": {},
1498
+ "output_type": "display_data"
1499
+ },
1500
+ {
1501
+ "data": {
1502
+ "text/html": [
1503
+ "\n",
1504
+ " <style>\n",
1505
+ " pre {\n",
1506
+ " white-space: pre-wrap;\n",
1507
+ " }\n",
1508
+ " </style>\n",
1509
+ " "
1510
+ ],
1511
+ "text/plain": [
1512
+ "<IPython.core.display.HTML object>"
1513
+ ]
1514
+ },
1515
+ "metadata": {},
1516
+ "output_type": "display_data"
1517
+ },
1518
+ {
1519
+ "name": "stdout",
1520
+ "output_type": "stream",
1521
+ "text": [
1522
+ "No errors found in uploaded zip JSON files.\n"
1523
+ ]
1524
+ }
1525
+ ],
1526
+ "source": [
1527
+ "# Print list of files with the incorrect format\n",
1528
+ "show_json_loading_errors(errors_list)"
1529
+ ]
1530
+ },
1531
+ {
1532
+ "cell_type": "code",
1533
+ "execution_count": 139,
1534
+ "metadata": {
1535
+ "colab": {
1536
+ "base_uri": "https://localhost:8080/",
1537
+ "height": 1000
1538
+ },
1539
+ "id": "utPzYUoKYiG9",
1540
+ "outputId": "eb3e1769-eb7a-4c93-96bc-6c998df55ef1"
1541
+ },
1542
+ "outputs": [
1543
+ {
1544
+ "data": {
1545
+ "text/html": [
1546
+ "\n",
1547
+ " <style>\n",
1548
+ " pre {\n",
1549
+ " white-space: pre-wrap;\n",
1550
+ " }\n",
1551
+ " </style>\n",
1552
+ " "
1553
+ ],
1554
+ "text/plain": [
1555
+ "<IPython.core.display.HTML object>"
1556
+ ]
1557
+ },
1558
+ "metadata": {},
1559
+ "output_type": "display_data"
1560
+ },
1561
+ {
1562
+ "data": {
1563
+ "text/html": [
1564
+ "\n",
1565
+ " <style>\n",
1566
+ " pre {\n",
1567
+ " white-space: pre-wrap;\n",
1568
+ " }\n",
1569
+ " </style>\n",
1570
+ " "
1571
+ ],
1572
+ "text/plain": [
1573
+ "<IPython.core.display.HTML object>"
1574
+ ]
1575
+ },
1576
+ "metadata": {},
1577
+ "output_type": "display_data"
1578
+ },
1579
+ {
1580
+ "name": "stdout",
1581
+ "output_type": "stream",
1582
+ "text": [
1583
+ "\n",
1584
+ "\n",
1585
+ "Result for file instructorTest/spencer-smith_jesse.json: \n",
1586
+ "Summary and feedback for student responses:\n",
1587
+ "\n",
1588
+ "Student 1:\n",
1589
+ "The student provided an excellent response to Question 1. They accurately explained the purpose of capitalizing expenses when incorporating them into the estimate of corporate earnings. They highlighted the importance of accurately reflecting the timing of costs and their related benefits, and how capitalizing expenses can impact a company's financial statements. The student also mentioned the matching principle of accounting and its role in ensuring the comparability and fairness of financial statements. Overall, the response is comprehensive and well-written. Well done!\n",
1590
+ "\n",
1591
+ "Student 2:\n",
1592
+ "The student gave a great response to Question 2. They correctly stated that expenses should be capitalized when they provide value beyond the current accounting period. The student also provided examples of capital expenses, such as the purchase price of a delivery truck or the cost of a building renovation. These examples demonstrate a clear understanding of the topic. The response is well-explained and shows a good grasp of the concept. Great job!\n",
1593
+ "\n",
1594
+ "Numeric summary:\n",
1595
+ "Both students provided correct answers to their respective questions, earning them a point each. Therefore, the numeric summary is as follows:\n",
1596
+ "Student 1: 1 point\n",
1597
+ "Student 2: 1 point\n",
1598
+ "\n",
1599
+ "\n",
1600
+ "Result for file instructorTest/bell_charreau.json: \n",
1601
+ "Summary:\n",
1602
+ "- The first student's response inaccurately states that capitalizing expenses is done to make the money 'look good' on the earnings report. The assistant provides a detailed explanation of the correct purpose of capitalizing expenses.\n",
1603
+ "- The second student's response partially identifies that capitalized expenses provide benefits for a longer period, but the assistant provides a more comprehensive explanation of what types of expenses should be capitalized and why they are treated differently from regular expenses.\n",
1604
+ "\n",
1605
+ "Feedback for the first student:\n",
1606
+ "The student accurately identified the purpose of capitalizing expenses, but their explanation was not entirely correct. They incorrectly stated that it is done to make the money 'look good' on the earnings report. The assistant provided a clear and detailed explanation of the correct purpose of capitalizing expenses and how it aligns with accounting principles.\n",
1607
+ "\n",
1608
+ "Feedback for the second student:\n",
1609
+ "The student partially identified the types of expenses that should be capitalized and why they are treated differently from regular expenses. However, their explanation was not comprehensive. The assistant provided a more detailed explanation of what types of expenses should be capitalized and why, as well as the concept of depreciation or amortization for spreading out the costs over time.\n",
1610
+ "\n",
1611
+ "Numeric Summary:\n",
1612
+ "The first student's response was partially correct, so they receive 0.5 points.\n",
1613
+ "The second student's response was also partially correct, so they receive 0.5 points.\n",
1614
+ "The total point count is 1.\n"
1615
+ ]
1616
+ }
1617
+ ],
1618
+ "source": [
1619
+ "# Example\n",
1620
+ "output_setup = (\"For each student response given in the following chat log, please generate a summary and detailed feedback for each students' responses,\"\n",
1621
+ " \", including what the student did well, and what was done poorly. \"\n",
1622
+ " \"Additionally, please filter feedback alphabetically by the name of the student from the filename.\")\n",
1623
+ "grading_instructions = (\"Then, calculate a numeric summary, summing up the point totals, \"\n",
1624
+ " \"in which a point is awarded for answering correctly. \")\n",
1625
+ "\n",
1626
+ "# Assuming `file_paths` is a list of file paths.\n",
1627
+ "processed_submissions = process_files(all_json_dfs, output_setup, grading_instructions, use_defaults=False, print_results=True)\n",
1628
+ "\n",
1629
+ "output_log_file(all_json_dfs, processed_submissions)"
1630
+ ]
1631
+ },
1632
+ {
1633
+ "cell_type": "markdown",
1634
+ "metadata": {
1635
+ "id": "Pc1myGweIgpo"
1636
+ },
1637
+ "source": [
1638
+ "## Instructor-Specified Evaluation\n",
1639
+ "Now, you can use the following code to create your settings. Change `output_setup` and `grading_instructions` as desired, making sure to keep the syntax (beginning and ending parentheses,and quotes at the beginning and end of each line) correct. `output_setup` has been copied from the previous cell, but you should fill in `grading_instructions`.\n",
1640
+ "\n",
1641
+ "### File Processing Options\n",
1642
+ "The `process_files` function has a number of settings.\n",
1643
+ "* The first setting must always be `all_json_dfs`, which contains the tabular representation of the json output.\n",
1644
+ "* The other settings should be set by name, and are:\n",
1645
+ " * **`output_desc`**: Shown as `output_setup` here, this contains the isntructions about how you want to the tabular representation to be set up. Note that you can also leave this off of the function list (just erase it and the following comma).\n",
1646
+ " * **`grad_instructions`**: Shown as `grading_instructions` here, use this variable to set grading instructions. Note that you can also leave this off of the function list (erase it and the following comma)\n",
1647
+ " * **`use_defaults`**: Some default grading and instruction prompts have already been created. If you set `use_defaults=TRUE`, both the grading instructions and the output table description will use the default prompts provided by the program, regardless of whether you have set values for `output_desc` or `grad_instructions`.\n",
1648
+ " * **`print_results`**: By default, the results will be printed for all students. However, if you don't want to see this output, you can set `print_results=False`.\n",
1649
+ "\n",
1650
+ "Again, make sure to observe the syntax. The defaults used in the program are shown in the above example."
1651
+ ]
1652
+ },
1653
+ {
1654
+ "cell_type": "code",
1655
+ "execution_count": 34,
1656
+ "metadata": {
1657
+ "colab": {
1658
+ "base_uri": "https://localhost:8080/",
1659
+ "height": 16
1660
+ },
1661
+ "id": "GiebKVlbYiG9",
1662
+ "outputId": "d4769d23-393c-4986-9418-5cef6944e6ab"
1663
+ },
1664
+ "outputs": [
1665
+ {
1666
+ "data": {
1667
+ "text/html": [
1668
+ "\n",
1669
+ " <style>\n",
1670
+ " pre {\n",
1671
+ " white-space: pre-wrap;\n",
1672
+ " }\n",
1673
+ " </style>\n",
1674
+ " "
1675
+ ],
1676
+ "text/plain": [
1677
+ "<IPython.core.display.HTML object>"
1678
+ ]
1679
+ },
1680
+ "metadata": {},
1681
+ "output_type": "display_data"
1682
+ }
1683
+ ],
1684
+ "source": [
1685
+ "output_setup = (\"For each student response given in the following chat log, please generate a summary and detailed feedback for each students' responses,\"\n",
1686
+ " \", including what the student did well, and what was done poorly. \")\n",
1687
+ "\n",
1688
+ "# add your own grading instructions\n",
1689
+ "grading_instructions = (\"INSERT ANY CUSTOM GRADING INSTRUCTIONS HERE\")\n",
1690
+ "\n",
1691
+ "# Assuming `file_paths` is a list of file paths.\n",
1692
+ "processed_submissions = process_files(all_json_dfs, output_setup, grading_instructions, use_defaults=False, print_results=True)\n",
1693
+ "\n",
1694
+ "output_log_file(all_json_dfs, processed_submissions)"
1695
+ ]
1696
+ },
1697
+ {
1698
+ "cell_type": "markdown",
1699
+ "metadata": {
1700
+ "id": "snLA6OZ83CrS"
1701
+ },
1702
+ "source": [
1703
+ "## Grading based on Blooms Taxonomy\n",
1704
+ "Another mechanism of evaluation is through Bloom's Taxonomy, where student responses will be evaluated based on where they fall on Bloom's Taxonomy. The higher the score with Bloom's Taxonomy, the more depth is illustrated by the question."
1705
+ ]
1706
+ },
1707
+ {
1708
+ "cell_type": "code",
1709
+ "execution_count": 140,
1710
+ "metadata": {
1711
+ "colab": {
1712
+ "base_uri": "https://localhost:8080/",
1713
+ "height": 625
1714
+ },
1715
+ "id": "HEPXCJdrYiG-",
1716
+ "outputId": "add813e2-6b7c-4772-dac5-5f756e893b8f"
1717
+ },
1718
+ "outputs": [
1719
+ {
1720
+ "data": {
1721
+ "text/html": [
1722
+ "\n",
1723
+ " <style>\n",
1724
+ " pre {\n",
1725
+ " white-space: pre-wrap;\n",
1726
+ " }\n",
1727
+ " </style>\n",
1728
+ " "
1729
+ ],
1730
+ "text/plain": [
1731
+ "<IPython.core.display.HTML object>"
1732
+ ]
1733
+ },
1734
+ "metadata": {},
1735
+ "output_type": "display_data"
1736
+ },
1737
+ {
1738
+ "data": {
1739
+ "text/html": [
1740
+ "\n",
1741
+ " <style>\n",
1742
+ " pre {\n",
1743
+ " white-space: pre-wrap;\n",
1744
+ " }\n",
1745
+ " </style>\n",
1746
+ " "
1747
+ ],
1748
+ "text/plain": [
1749
+ "<IPython.core.display.HTML object>"
1750
+ ]
1751
+ },
1752
+ "metadata": {},
1753
+ "output_type": "display_data"
1754
+ },
1755
+ {
1756
+ "data": {
1757
+ "text/html": [
1758
+ "\n",
1759
+ " <style>\n",
1760
+ " pre {\n",
1761
+ " white-space: pre-wrap;\n",
1762
+ " }\n",
1763
+ " </style>\n",
1764
+ " "
1765
+ ],
1766
+ "text/plain": [
1767
+ "<IPython.core.display.HTML object>"
1768
+ ]
1769
+ },
1770
+ "metadata": {},
1771
+ "output_type": "display_data"
1772
+ },
1773
+ {
1774
+ "data": {
1775
+ "text/html": [
1776
+ "\n",
1777
+ " <style>\n",
1778
+ " pre {\n",
1779
+ " white-space: pre-wrap;\n",
1780
+ " }\n",
1781
+ " </style>\n",
1782
+ " "
1783
+ ],
1784
+ "text/plain": [
1785
+ "<IPython.core.display.HTML object>"
1786
+ ]
1787
+ },
1788
+ "metadata": {},
1789
+ "output_type": "display_data"
1790
+ },
1791
+ {
1792
+ "data": {
1793
+ "text/html": [
1794
+ "\n",
1795
+ " <style>\n",
1796
+ " pre {\n",
1797
+ " white-space: pre-wrap;\n",
1798
+ " }\n",
1799
+ " </style>\n",
1800
+ " "
1801
+ ],
1802
+ "text/plain": [
1803
+ "<IPython.core.display.HTML object>"
1804
+ ]
1805
+ },
1806
+ "metadata": {},
1807
+ "output_type": "display_data"
1808
+ },
1809
+ {
1810
+ "name": "stdout",
1811
+ "output_type": "stream",
1812
+ "text": [
1813
+ "\n",
1814
+ "\n",
1815
+ "Result for file 0 instructorTest/bell_charreau.json \n",
1816
+ "0 instructorTest/spencer-smith_jesse.json\n",
1817
+ "Name: filename, dtype: object: \n",
1818
+ "Student 1:\n",
1819
+ "Summary: The student incorrectly states that capitalizing expenses is done to make the money look good on the earnings report.\n",
1820
+ "Feedback: The student's response is not accurate. They misunderstood the purpose of capitalizing expenses. The main purpose is to spread the cost of certain long-term assets over their useful life, not to make the money 'look good' on the earnings report. \n",
1821
+ "Overall Level of Engagement and Knowledge: 1 (Remember)\n",
1822
+ "\n",
1823
+ "Student 2:\n",
1824
+ "Summary: The student partially understands the purpose of capitalizing expenses, but their answer could be more comprehensive.\n",
1825
+ "Feedback: The student correctly notes that capitalized expenses provide benefits for a longer period and are different from regular expenses. However, their answer could be more comprehensive and provide a more thorough explanation of why certain expenses are capitalized and how they are treated differently.\n",
1826
+ "Overall Level of Engagement and Knowledge: 3 (Apply)\n",
1827
+ "\n",
1828
+ "Student 3:\n",
1829
+ "Summary: The student provides a comprehensive and accurate explanation of the purpose of capitalizing expenses.\n",
1830
+ "Feedback: The student's response is excellent. They provide a comprehensive explanation of why capitalizing expenses is crucial for incorporating costs into the estimate of corporate earnings. They correctly highlight the timing of costs and their related benefits, the impact on financial statements, and the alignment with the matching principle of accounting.\n",
1831
+ "Overall Level of Engagement and Knowledge: 6 (Create)\n",
1832
+ "\n",
1833
+ "Student 4:\n",
1834
+ "Summary: The student accurately states that expenses should be capitalized when they provide value beyond the current accounting period and provides relevant examples.\n",
1835
+ "Feedback: The student's response is great. They correctly identify the types of expenses that should be capitalized and provide relevant examples. They demonstrate a clear understanding of the concept and provide a well-explained answer.\n",
1836
+ "Overall Level of Engagement and Knowledge: 5 (Evaluate)\n"
1837
+ ]
1838
+ }
1839
+ ],
1840
+ "source": [
1841
+ "output_setup = (\"For each student response given in the following chat log, please generate a summary and detailed feedback for each students' responses,\"\n",
1842
+ " \", including what the student did well, and what was done poorly. \")\n",
1843
+ "grading_instructions = \"\"\"\\nEvaluate the each student's overall level or engagement and knowledge, based on bloom's taxonomy using their responses.\n",
1844
+ "Bloom's taxonomy is rated on a 1-6 point system, with 1 being remember (recall facts and basic concepts), 2 being understand (explain ideas or concepts),\n",
1845
+ "3 being apply (use information in new situations), 4 being analyze (draw connections among ideas), 5 being evaluate (justify a stand or decision),\n",
1846
+ "and 6 being create (produce new or original work). Assign the interaction a score from 1-6, where 1 = remember, 2 = understand, 3 = apply, 4 = analyze,\n",
1847
+ "5 = evaluate, and 6 = create.\"\"\"\n",
1848
+ "\n",
1849
+ "# Assuming `file_paths` is a list of file paths.\n",
1850
+ "processed_submissions = process_files(all_json_dfs, output_setup, grading_instructions, use_defaults=False, print_results=True)\n",
1851
+ "\n",
1852
+ "output_log_file(all_json_dfs, processed_submissions)"
1853
+ ]
1854
+ },
1855
+ {
1856
+ "cell_type": "markdown",
1857
+ "metadata": {
1858
+ "id": "FI5-vnUvXM03"
1859
+ },
1860
+ "source": [
1861
+ "# Returning Results\n"
1862
+ ]
1863
+ },
1864
+ {
1865
+ "cell_type": "markdown",
1866
+ "metadata": {
1867
+ "id": "LgoGt82CYiG-"
1868
+ },
1869
+ "source": [
1870
+ "**Extract Student Responses ONLY from CHAT JSON**\n",
1871
+ "\n",
1872
+ "Below are relevant user components of dataframes, including the conversion from the original json, the interaction labeled dataframe, and the output dataframe. Check to make sure they make sense."
1873
+ ]
1874
+ },
1875
+ {
1876
+ "cell_type": "code",
1877
+ "execution_count": null,
1878
+ "metadata": {
1879
+ "colab": {
1880
+ "base_uri": "https://localhost:8080/",
1881
+ "height": 16
1882
+ },
1883
+ "id": "HVq9i_mXYiG-",
1884
+ "outputId": "fb5251e9-a327-4dfa-e294-03f5c58f6d35"
1885
+ },
1886
+ "outputs": [
1887
+ {
1888
+ "data": {
1889
+ "text/html": [
1890
+ "\n",
1891
+ " <style>\n",
1892
+ " pre {\n",
1893
+ " white-space: pre-wrap;\n",
1894
+ " }\n",
1895
+ " </style>\n",
1896
+ " "
1897
+ ],
1898
+ "text/plain": [
1899
+ "<IPython.core.display.HTML object>"
1900
+ ]
1901
+ },
1902
+ "metadata": {},
1903
+ "output_type": "display_data"
1904
+ },
1905
+ {
1906
+ "data": {
1907
+ "text/html": [
1908
+ "\n",
1909
+ " <style>\n",
1910
+ " pre {\n",
1911
+ " white-space: pre-wrap;\n",
1912
+ " }\n",
1913
+ " </style>\n",
1914
+ " "
1915
+ ],
1916
+ "text/plain": [
1917
+ "<IPython.core.display.HTML object>"
1918
+ ]
1919
+ },
1920
+ "metadata": {},
1921
+ "output_type": "display_data"
1922
+ },
1923
+ {
1924
+ "data": {
1925
+ "text/html": [
1926
+ "\n",
1927
+ " <style>\n",
1928
+ " pre {\n",
1929
+ " white-space: pre-wrap;\n",
1930
+ " }\n",
1931
+ " </style>\n",
1932
+ " "
1933
+ ],
1934
+ "text/plain": [
1935
+ "<IPython.core.display.HTML object>"
1936
+ ]
1937
+ },
1938
+ "metadata": {},
1939
+ "output_type": "display_data"
1940
+ },
1941
+ {
1942
+ "data": {
1943
+ "text/html": [
1944
+ "\n",
1945
+ " <style>\n",
1946
+ " pre {\n",
1947
+ " white-space: pre-wrap;\n",
1948
+ " }\n",
1949
+ " </style>\n",
1950
+ " "
1951
+ ],
1952
+ "text/plain": [
1953
+ "<IPython.core.display.HTML object>"
1954
+ ]
1955
+ },
1956
+ "metadata": {},
1957
+ "output_type": "display_data"
1958
+ },
1959
+ {
1960
+ "data": {
1961
+ "text/html": [
1962
+ "\n",
1963
+ " <style>\n",
1964
+ " pre {\n",
1965
+ " white-space: pre-wrap;\n",
1966
+ " }\n",
1967
+ " </style>\n",
1968
+ " "
1969
+ ],
1970
+ "text/plain": [
1971
+ "<IPython.core.display.HTML object>"
1972
+ ]
1973
+ },
1974
+ "metadata": {},
1975
+ "output_type": "display_data"
1976
+ }
1977
+ ],
1978
+ "source": [
1979
+ "def write_responses_to_csv(json_dfs):\n",
1980
+ " # Concatenate all dataframes in json_dfs into one large dataframe\n",
1981
+ " df = pd.concat(json_dfs)\n",
1982
+ "\n",
1983
+ " # Write the dataframe to a CSV\n",
1984
+ " df.to_csv('all_student_responses.csv', index=False)\n",
1985
+ "\n",
1986
+ "write_responses_to_csv(all_json_dfs)"
1987
+ ]
1988
+ },
1989
+ {
1990
+ "cell_type": "markdown",
1991
+ "metadata": {
1992
+ "id": "1WIGxKmDYiG-"
1993
+ },
1994
+ "source": [
1995
+ "**Saving/Downloading AI-Assisted Student Evaluation from Chat JSON**\n",
1996
+ "\n",
1997
+ "Execute the following cell to have all of your students' data returned in a single CSV file."
1998
+ ]
1999
+ },
2000
+ {
2001
+ "cell_type": "code",
2002
+ "execution_count": null,
2003
+ "metadata": {
2004
+ "colab": {
2005
+ "base_uri": "https://localhost:8080/",
2006
+ "height": 16
2007
+ },
2008
+ "id": "QnWNEeqjYiG-",
2009
+ "outputId": "e98c0c39-8449-45d4-f93a-d9394f6781bf"
2010
+ },
2011
+ "outputs": [
2012
+ {
2013
+ "data": {
2014
+ "text/html": [
2015
+ "\n",
2016
+ " <style>\n",
2017
+ " pre {\n",
2018
+ " white-space: pre-wrap;\n",
2019
+ " }\n",
2020
+ " </style>\n",
2021
+ " "
2022
+ ],
2023
+ "text/plain": [
2024
+ "<IPython.core.display.HTML object>"
2025
+ ]
2026
+ },
2027
+ "metadata": {},
2028
+ "output_type": "display_data"
2029
+ },
2030
+ {
2031
+ "data": {
2032
+ "text/html": [
2033
+ "\n",
2034
+ " <style>\n",
2035
+ " pre {\n",
2036
+ " white-space: pre-wrap;\n",
2037
+ " }\n",
2038
+ " </style>\n",
2039
+ " "
2040
+ ],
2041
+ "text/plain": [
2042
+ "<IPython.core.display.HTML object>"
2043
+ ]
2044
+ },
2045
+ "metadata": {},
2046
+ "output_type": "display_data"
2047
+ },
2048
+ {
2049
+ "data": {
2050
+ "text/html": [
2051
+ "\n",
2052
+ " <style>\n",
2053
+ " pre {\n",
2054
+ " white-space: pre-wrap;\n",
2055
+ " }\n",
2056
+ " </style>\n",
2057
+ " "
2058
+ ],
2059
+ "text/plain": [
2060
+ "<IPython.core.display.HTML object>"
2061
+ ]
2062
+ },
2063
+ "metadata": {},
2064
+ "output_type": "display_data"
2065
+ },
2066
+ {
2067
+ "data": {
2068
+ "text/html": [
2069
+ "\n",
2070
+ " <style>\n",
2071
+ " pre {\n",
2072
+ " white-space: pre-wrap;\n",
2073
+ " }\n",
2074
+ " </style>\n",
2075
+ " "
2076
+ ],
2077
+ "text/plain": [
2078
+ "<IPython.core.display.HTML object>"
2079
+ ]
2080
+ },
2081
+ "metadata": {},
2082
+ "output_type": "display_data"
2083
+ },
2084
+ {
2085
+ "data": {
2086
+ "text/html": [
2087
+ "\n",
2088
+ " <style>\n",
2089
+ " pre {\n",
2090
+ " white-space: pre-wrap;\n",
2091
+ " }\n",
2092
+ " </style>\n",
2093
+ " "
2094
+ ],
2095
+ "text/plain": [
2096
+ "<IPython.core.display.HTML object>"
2097
+ ]
2098
+ },
2099
+ "metadata": {},
2100
+ "output_type": "display_data"
2101
+ }
2102
+ ],
2103
+ "source": [
2104
+ "# Start with an empty dataframe\n",
2105
+ "all_results_df = pd.DataFrame()\n",
2106
+ "\n",
2107
+ "for result in processed_submissions:\n",
2108
+ "\n",
2109
+ " # Append the data from the current file to the master dataframe\n",
2110
+ " all_results_df = pd.concat([all_results_df, df])\n",
2111
+ "\n",
2112
+ "# Now all_results_df contains data from all the files\n",
2113
+ "\n",
2114
+ "# Write all results to a single CSV\n",
2115
+ "all_results_df.to_csv('all_results.csv', index=False)"
2116
+ ]
2117
+ }
2118
+ ],
2119
+ "metadata": {
2120
+ "colab": {
2121
+ "include_colab_link": true,
2122
+ "provenance": []
2123
+ },
2124
+ "kernelspec": {
2125
+ "display_name": "Python 3",
2126
+ "name": "python3"
2127
+ },
2128
+ "language_info": {
2129
+ "name": "python",
2130
+ "version": "3.10.6"
2131
+ },
2132
+ "widgets": {
2133
+ "application/vnd.jupyter.widget-state+json": {
2134
+ "1072a8a142f64dfd96ee528a2e9d1595": {
2135
+ "model_module": "@jupyter-widgets/controls",
2136
+ "model_module_version": "1.5.0",
2137
+ "model_name": "LabelModel",
2138
+ "state": {
2139
+ "_dom_classes": [],
2140
+ "_model_module": "@jupyter-widgets/controls",
2141
+ "_model_module_version": "1.5.0",
2142
+ "_model_name": "LabelModel",
2143
+ "_view_count": null,
2144
+ "_view_module": "@jupyter-widgets/controls",
2145
+ "_view_module_version": "1.5.0",
2146
+ "_view_name": "LabelView",
2147
+ "description": "",
2148
+ "description_tooltip": null,
2149
+ "layout": "IPY_MODEL_abbecdc637694e7cb026e003244e7037",
2150
+ "placeholder": "​",
2151
+ "style": "IPY_MODEL_7f814595d31e4b86992b5bd6bc85ced4",
2152
+ "value": "Upload User Files:\t"
2153
+ }
2154
+ },
2155
+ "1b7ee0de15484cd5aecd6d8ca3b6ee9d": {
2156
+ "model_module": "@jupyter-widgets/controls",
2157
+ "model_module_version": "1.5.0",
2158
+ "model_name": "VBoxModel",
2159
+ "state": {
2160
+ "_dom_classes": [],
2161
+ "_model_module": "@jupyter-widgets/controls",
2162
+ "_model_module_version": "1.5.0",
2163
+ "_model_name": "VBoxModel",
2164
+ "_view_count": null,
2165
+ "_view_module": "@jupyter-widgets/controls",
2166
+ "_view_module_version": "1.5.0",
2167
+ "_view_name": "VBoxView",
2168
+ "box_style": "",
2169
+ "children": [
2170
+ "IPY_MODEL_b051a90758434644955747bc02d00bab",
2171
+ "IPY_MODEL_dde20647d3594d31b66b19659f53a95e",
2172
+ "IPY_MODEL_8610fffd2d2a4ec28f8c874c06073ce7",
2173
+ "IPY_MODEL_654ab6d155eb457ea5c719a9ac27ad5b",
2174
+ "IPY_MODEL_86cb4f568f454ff8832face502fb0745"
2175
+ ],
2176
+ "layout": "IPY_MODEL_e30fe87f01bc4580a61713b5b72439a2"
2177
+ }
2178
+ },
2179
+ "252b8009f3734ed2908049ebb40c0247": {
2180
+ "model_module": "@jupyter-widgets/controls",
2181
+ "model_module_version": "1.5.0",
2182
+ "model_name": "TextareaModel",
2183
+ "state": {
2184
+ "_dom_classes": [],
2185
+ "_model_module": "@jupyter-widgets/controls",
2186
+ "_model_module_version": "1.5.0",
2187
+ "_model_name": "TextareaModel",
2188
+ "_view_count": null,
2189
+ "_view_module": "@jupyter-widgets/controls",
2190
+ "_view_module_version": "1.5.0",
2191
+ "_view_name": "TextareaView",
2192
+ "continuous_update": true,
2193
+ "description": "Learning Objectives",
2194
+ "description_tooltip": null,
2195
+ "disabled": false,
2196
+ "layout": "IPY_MODEL_b74cf92175374028948d4cf529d4d1e6",
2197
+ "placeholder": "Learning objectives: 1. Understand and implement classes in object-oriented programming",
2198
+ "rows": null,
2199
+ "style": "IPY_MODEL_f7d75b0a32554a9589c513336fc30095",
2200
+ "value": ""
2201
+ }
2202
+ },
2203
+ "26d13984d45745858d3b890bc7f18a90": {
2204
+ "model_module": "@jupyter-widgets/controls",
2205
+ "model_module_version": "1.5.0",
2206
+ "model_name": "ButtonStyleModel",
2207
+ "state": {
2208
+ "_model_module": "@jupyter-widgets/controls",
2209
+ "_model_module_version": "1.5.0",
2210
+ "_model_name": "ButtonStyleModel",
2211
+ "_view_count": null,
2212
+ "_view_module": "@jupyter-widgets/base",
2213
+ "_view_module_version": "1.2.0",
2214
+ "_view_name": "StyleView",
2215
+ "button_color": null,
2216
+ "font_weight": ""
2217
+ }
2218
+ },
2219
+ "453b12da4b6540cd9e4e57f73a4d670c": {
2220
+ "model_module": "@jupyter-widgets/controls",
2221
+ "model_module_version": "1.5.0",
2222
+ "model_name": "DescriptionStyleModel",
2223
+ "state": {
2224
+ "_model_module": "@jupyter-widgets/controls",
2225
+ "_model_module_version": "1.5.0",
2226
+ "_model_name": "DescriptionStyleModel",
2227
+ "_view_count": null,
2228
+ "_view_module": "@jupyter-widgets/base",
2229
+ "_view_module_version": "1.2.0",
2230
+ "_view_name": "StyleView",
2231
+ "description_width": ""
2232
+ }
2233
+ },
2234
+ "49f80567705147f0b82d45b7f06dd1ba": {
2235
+ "model_module": "@jupyter-widgets/controls",
2236
+ "model_module_version": "1.5.0",
2237
+ "model_name": "FileUploadModel",
2238
+ "state": {
2239
+ "_counter": 1,
2240
+ "_dom_classes": [],
2241
+ "_model_module": "@jupyter-widgets/controls",
2242
+ "_model_module_version": "1.5.0",
2243
+ "_model_name": "FileUploadModel",
2244
+ "_view_count": null,
2245
+ "_view_module": "@jupyter-widgets/controls",
2246
+ "_view_module_version": "1.5.0",
2247
+ "_view_name": "FileUploadView",
2248
+ "accept": ".zip",
2249
+ "button_style": "",
2250
+ "data": [
2251
+ null
2252
+ ],
2253
+ "description": "Upload",
2254
+ "description_tooltip": null,
2255
+ "disabled": false,
2256
+ "error": "",
2257
+ "icon": "upload",
2258
+ "layout": "IPY_MODEL_dfa8d6c7d70b42468cbda035de89404c",
2259
+ "metadata": [
2260
+ {
2261
+ "lastModified": 1689919477171,
2262
+ "name": "instructorTest.zip",
2263
+ "size": 4958,
2264
+ "type": "application/zip"
2265
+ }
2266
+ ],
2267
+ "multiple": false,
2268
+ "style": "IPY_MODEL_26d13984d45745858d3b890bc7f18a90"
2269
+ }
2270
+ },
2271
+ "53722998fbe64a7c94829b79e8cd69d6": {
2272
+ "model_module": "@jupyter-widgets/base",
2273
+ "model_module_version": "1.2.0",
2274
+ "model_name": "LayoutModel",
2275
+ "state": {
2276
+ "_model_module": "@jupyter-widgets/base",
2277
+ "_model_module_version": "1.2.0",
2278
+ "_model_name": "LayoutModel",
2279
+ "_view_count": null,
2280
+ "_view_module": "@jupyter-widgets/base",
2281
+ "_view_module_version": "1.2.0",
2282
+ "_view_name": "LayoutView",
2283
+ "align_content": null,
2284
+ "align_items": null,
2285
+ "align_self": null,
2286
+ "border": null,
2287
+ "bottom": null,
2288
+ "display": null,
2289
+ "flex": null,
2290
+ "flex_flow": null,
2291
+ "grid_area": null,
2292
+ "grid_auto_columns": null,
2293
+ "grid_auto_flow": null,
2294
+ "grid_auto_rows": null,
2295
+ "grid_column": null,
2296
+ "grid_gap": null,
2297
+ "grid_row": null,
2298
+ "grid_template_areas": null,
2299
+ "grid_template_columns": null,
2300
+ "grid_template_rows": null,
2301
+ "height": null,
2302
+ "justify_content": null,
2303
+ "justify_items": null,
2304
+ "left": null,
2305
+ "margin": null,
2306
+ "max_height": null,
2307
+ "max_width": null,
2308
+ "min_height": null,
2309
+ "min_width": null,
2310
+ "object_fit": null,
2311
+ "object_position": null,
2312
+ "order": null,
2313
+ "overflow": null,
2314
+ "overflow_x": null,
2315
+ "overflow_y": null,
2316
+ "padding": null,
2317
+ "right": null,
2318
+ "top": null,
2319
+ "visibility": null,
2320
+ "width": null
2321
+ }
2322
+ },
2323
+ "54e3918921f44fb4a9020beab951fcdf": {
2324
+ "model_module": "@jupyter-widgets/controls",
2325
+ "model_module_version": "1.5.0",
2326
+ "model_name": "DescriptionStyleModel",
2327
+ "state": {
2328
+ "_model_module": "@jupyter-widgets/controls",
2329
+ "_model_module_version": "1.5.0",
2330
+ "_model_name": "DescriptionStyleModel",
2331
+ "_view_count": null,
2332
+ "_view_module": "@jupyter-widgets/base",
2333
+ "_view_module_version": "1.2.0",
2334
+ "_view_name": "StyleView",
2335
+ "description_width": "initial"
2336
+ }
2337
+ },
2338
+ "5a17f4509d194105b23dd616e45183d5": {
2339
+ "model_module": "@jupyter-widgets/base",
2340
+ "model_module_version": "1.2.0",
2341
+ "model_name": "LayoutModel",
2342
+ "state": {
2343
+ "_model_module": "@jupyter-widgets/base",
2344
+ "_model_module_version": "1.2.0",
2345
+ "_model_name": "LayoutModel",
2346
+ "_view_count": null,
2347
+ "_view_module": "@jupyter-widgets/base",
2348
+ "_view_module_version": "1.2.0",
2349
+ "_view_name": "LayoutView",
2350
+ "align_content": null,
2351
+ "align_items": null,
2352
+ "align_self": null,
2353
+ "border": null,
2354
+ "bottom": null,
2355
+ "display": null,
2356
+ "flex": null,
2357
+ "flex_flow": null,
2358
+ "grid_area": null,
2359
+ "grid_auto_columns": null,
2360
+ "grid_auto_flow": null,
2361
+ "grid_auto_rows": null,
2362
+ "grid_column": null,
2363
+ "grid_gap": null,
2364
+ "grid_row": null,
2365
+ "grid_template_areas": null,
2366
+ "grid_template_columns": null,
2367
+ "grid_template_rows": null,
2368
+ "height": null,
2369
+ "justify_content": null,
2370
+ "justify_items": null,
2371
+ "left": null,
2372
+ "margin": null,
2373
+ "max_height": null,
2374
+ "max_width": null,
2375
+ "min_height": null,
2376
+ "min_width": null,
2377
+ "object_fit": null,
2378
+ "object_position": null,
2379
+ "order": null,
2380
+ "overflow": null,
2381
+ "overflow_x": null,
2382
+ "overflow_y": null,
2383
+ "padding": null,
2384
+ "right": null,
2385
+ "top": null,
2386
+ "visibility": null,
2387
+ "width": null
2388
+ }
2389
+ },
2390
+ "60b80d550efa403a825a3cb913c26f53": {
2391
+ "model_module": "@jupyter-widgets/base",
2392
+ "model_module_version": "1.2.0",
2393
+ "model_name": "LayoutModel",
2394
+ "state": {
2395
+ "_model_module": "@jupyter-widgets/base",
2396
+ "_model_module_version": "1.2.0",
2397
+ "_model_name": "LayoutModel",
2398
+ "_view_count": null,
2399
+ "_view_module": "@jupyter-widgets/base",
2400
+ "_view_module_version": "1.2.0",
2401
+ "_view_name": "LayoutView",
2402
+ "align_content": null,
2403
+ "align_items": null,
2404
+ "align_self": null,
2405
+ "border": null,
2406
+ "bottom": null,
2407
+ "display": null,
2408
+ "flex": null,
2409
+ "flex_flow": null,
2410
+ "grid_area": null,
2411
+ "grid_auto_columns": null,
2412
+ "grid_auto_flow": null,
2413
+ "grid_auto_rows": null,
2414
+ "grid_column": null,
2415
+ "grid_gap": null,
2416
+ "grid_row": null,
2417
+ "grid_template_areas": null,
2418
+ "grid_template_columns": null,
2419
+ "grid_template_rows": null,
2420
+ "height": null,
2421
+ "justify_content": null,
2422
+ "justify_items": null,
2423
+ "left": null,
2424
+ "margin": null,
2425
+ "max_height": null,
2426
+ "max_width": null,
2427
+ "min_height": null,
2428
+ "min_width": null,
2429
+ "object_fit": null,
2430
+ "object_position": null,
2431
+ "order": null,
2432
+ "overflow": null,
2433
+ "overflow_x": null,
2434
+ "overflow_y": null,
2435
+ "padding": null,
2436
+ "right": null,
2437
+ "top": null,
2438
+ "visibility": null,
2439
+ "width": null
2440
+ }
2441
+ },
2442
+ "654ab6d155eb457ea5c719a9ac27ad5b": {
2443
+ "model_module": "@jupyter-widgets/controls",
2444
+ "model_module_version": "1.5.0",
2445
+ "model_name": "ButtonModel",
2446
+ "state": {
2447
+ "_dom_classes": [],
2448
+ "_model_module": "@jupyter-widgets/controls",
2449
+ "_model_module_version": "1.5.0",
2450
+ "_model_name": "ButtonModel",
2451
+ "_view_count": null,
2452
+ "_view_module": "@jupyter-widgets/controls",
2453
+ "_view_module_version": "1.5.0",
2454
+ "_view_name": "ButtonView",
2455
+ "button_style": "success",
2456
+ "description": "Submit",
2457
+ "disabled": false,
2458
+ "icon": "check",
2459
+ "layout": "IPY_MODEL_81c4dda35a7d4e15821bb4bc0973354e",
2460
+ "style": "IPY_MODEL_df1c46361f714aceb9c046f98fede40c",
2461
+ "tooltip": ""
2462
+ }
2463
+ },
2464
+ "6622f76f91f44527a87a7575bbd388d2": {
2465
+ "model_module": "@jupyter-widgets/controls",
2466
+ "model_module_version": "1.5.0",
2467
+ "model_name": "HBoxModel",
2468
+ "state": {
2469
+ "_dom_classes": [],
2470
+ "_model_module": "@jupyter-widgets/controls",
2471
+ "_model_module_version": "1.5.0",
2472
+ "_model_name": "HBoxModel",
2473
+ "_view_count": null,
2474
+ "_view_module": "@jupyter-widgets/controls",
2475
+ "_view_module_version": "1.5.0",
2476
+ "_view_name": "HBoxView",
2477
+ "box_style": "",
2478
+ "children": [
2479
+ "IPY_MODEL_7f7164e80a464ba9b99f96c10132db25",
2480
+ "IPY_MODEL_49f80567705147f0b82d45b7f06dd1ba"
2481
+ ],
2482
+ "layout": "IPY_MODEL_5a17f4509d194105b23dd616e45183d5"
2483
+ }
2484
+ },
2485
+ "67b4083cd4234f52bb7cca27ab9cddb3": {
2486
+ "model_module": "@jupyter-widgets/controls",
2487
+ "model_module_version": "1.5.0",
2488
+ "model_name": "FileUploadModel",
2489
+ "state": {
2490
+ "_counter": 0,
2491
+ "_dom_classes": [],
2492
+ "_model_module": "@jupyter-widgets/controls",
2493
+ "_model_module_version": "1.5.0",
2494
+ "_model_name": "FileUploadModel",
2495
+ "_view_count": null,
2496
+ "_view_module": "@jupyter-widgets/controls",
2497
+ "_view_module_version": "1.5.0",
2498
+ "_view_name": "FileUploadView",
2499
+ "accept": ".zip",
2500
+ "button_style": "",
2501
+ "data": [],
2502
+ "description": "Upload",
2503
+ "description_tooltip": null,
2504
+ "disabled": false,
2505
+ "error": "",
2506
+ "icon": "upload",
2507
+ "layout": "IPY_MODEL_76548751bb9c4bcb9d4f39788ea7d4af",
2508
+ "metadata": [],
2509
+ "multiple": false,
2510
+ "style": "IPY_MODEL_dbb88901f5084d49af208b91b52b6073"
2511
+ }
2512
+ },
2513
+ "76548751bb9c4bcb9d4f39788ea7d4af": {
2514
+ "model_module": "@jupyter-widgets/base",
2515
+ "model_module_version": "1.2.0",
2516
+ "model_name": "LayoutModel",
2517
+ "state": {
2518
+ "_model_module": "@jupyter-widgets/base",
2519
+ "_model_module_version": "1.2.0",
2520
+ "_model_name": "LayoutModel",
2521
+ "_view_count": null,
2522
+ "_view_module": "@jupyter-widgets/base",
2523
+ "_view_module_version": "1.2.0",
2524
+ "_view_name": "LayoutView",
2525
+ "align_content": null,
2526
+ "align_items": null,
2527
+ "align_self": null,
2528
+ "border": null,
2529
+ "bottom": null,
2530
+ "display": null,
2531
+ "flex": null,
2532
+ "flex_flow": null,
2533
+ "grid_area": null,
2534
+ "grid_auto_columns": null,
2535
+ "grid_auto_flow": null,
2536
+ "grid_auto_rows": null,
2537
+ "grid_column": null,
2538
+ "grid_gap": null,
2539
+ "grid_row": null,
2540
+ "grid_template_areas": null,
2541
+ "grid_template_columns": null,
2542
+ "grid_template_rows": null,
2543
+ "height": null,
2544
+ "justify_content": null,
2545
+ "justify_items": null,
2546
+ "left": null,
2547
+ "margin": null,
2548
+ "max_height": null,
2549
+ "max_width": null,
2550
+ "min_height": null,
2551
+ "min_width": null,
2552
+ "object_fit": null,
2553
+ "object_position": null,
2554
+ "order": null,
2555
+ "overflow": null,
2556
+ "overflow_x": null,
2557
+ "overflow_y": null,
2558
+ "padding": null,
2559
+ "right": null,
2560
+ "top": null,
2561
+ "visibility": null,
2562
+ "width": null
2563
+ }
2564
+ },
2565
+ "7f7164e80a464ba9b99f96c10132db25": {
2566
+ "model_module": "@jupyter-widgets/controls",
2567
+ "model_module_version": "1.5.0",
2568
+ "model_name": "LabelModel",
2569
+ "state": {
2570
+ "_dom_classes": [],
2571
+ "_model_module": "@jupyter-widgets/controls",
2572
+ "_model_module_version": "1.5.0",
2573
+ "_model_name": "LabelModel",
2574
+ "_view_count": null,
2575
+ "_view_module": "@jupyter-widgets/controls",
2576
+ "_view_module_version": "1.5.0",
2577
+ "_view_name": "LabelView",
2578
+ "description": "",
2579
+ "description_tooltip": null,
2580
+ "layout": "IPY_MODEL_60b80d550efa403a825a3cb913c26f53",
2581
+ "placeholder": "​",
2582
+ "style": "IPY_MODEL_d0bd0e3f12594ff1a51365b65a3fcc43",
2583
+ "value": "Upload User Files:\t"
2584
+ }
2585
+ },
2586
+ "7f814595d31e4b86992b5bd6bc85ced4": {
2587
+ "model_module": "@jupyter-widgets/controls",
2588
+ "model_module_version": "1.5.0",
2589
+ "model_name": "DescriptionStyleModel",
2590
+ "state": {
2591
+ "_model_module": "@jupyter-widgets/controls",
2592
+ "_model_module_version": "1.5.0",
2593
+ "_model_name": "DescriptionStyleModel",
2594
+ "_view_count": null,
2595
+ "_view_module": "@jupyter-widgets/base",
2596
+ "_view_module_version": "1.2.0",
2597
+ "_view_name": "StyleView",
2598
+ "description_width": ""
2599
+ }
2600
+ },
2601
+ "81c4dda35a7d4e15821bb4bc0973354e": {
2602
+ "model_module": "@jupyter-widgets/base",
2603
+ "model_module_version": "1.2.0",
2604
+ "model_name": "LayoutModel",
2605
+ "state": {
2606
+ "_model_module": "@jupyter-widgets/base",
2607
+ "_model_module_version": "1.2.0",
2608
+ "_model_name": "LayoutModel",
2609
+ "_view_count": null,
2610
+ "_view_module": "@jupyter-widgets/base",
2611
+ "_view_module_version": "1.2.0",
2612
+ "_view_name": "LayoutView",
2613
+ "align_content": null,
2614
+ "align_items": null,
2615
+ "align_self": null,
2616
+ "border": null,
2617
+ "bottom": null,
2618
+ "display": null,
2619
+ "flex": null,
2620
+ "flex_flow": null,
2621
+ "grid_area": null,
2622
+ "grid_auto_columns": null,
2623
+ "grid_auto_flow": null,
2624
+ "grid_auto_rows": null,
2625
+ "grid_column": null,
2626
+ "grid_gap": null,
2627
+ "grid_row": null,
2628
+ "grid_template_areas": null,
2629
+ "grid_template_columns": null,
2630
+ "grid_template_rows": null,
2631
+ "height": null,
2632
+ "justify_content": null,
2633
+ "justify_items": null,
2634
+ "left": null,
2635
+ "margin": null,
2636
+ "max_height": null,
2637
+ "max_width": null,
2638
+ "min_height": null,
2639
+ "min_width": null,
2640
+ "object_fit": null,
2641
+ "object_position": null,
2642
+ "order": null,
2643
+ "overflow": null,
2644
+ "overflow_x": null,
2645
+ "overflow_y": null,
2646
+ "padding": null,
2647
+ "right": null,
2648
+ "top": null,
2649
+ "visibility": null,
2650
+ "width": null
2651
+ }
2652
+ },
2653
+ "8610fffd2d2a4ec28f8c874c06073ce7": {
2654
+ "model_module": "@jupyter-widgets/controls",
2655
+ "model_module_version": "1.5.0",
2656
+ "model_name": "HBoxModel",
2657
+ "state": {
2658
+ "_dom_classes": [],
2659
+ "_model_module": "@jupyter-widgets/controls",
2660
+ "_model_module_version": "1.5.0",
2661
+ "_model_name": "HBoxModel",
2662
+ "_view_count": null,
2663
+ "_view_module": "@jupyter-widgets/controls",
2664
+ "_view_module_version": "1.5.0",
2665
+ "_view_name": "HBoxView",
2666
+ "box_style": "",
2667
+ "children": [
2668
+ "IPY_MODEL_1072a8a142f64dfd96ee528a2e9d1595",
2669
+ "IPY_MODEL_67b4083cd4234f52bb7cca27ab9cddb3"
2670
+ ],
2671
+ "layout": "IPY_MODEL_d0a1ebdf7fc0473f91c39b29ca580934"
2672
+ }
2673
+ },
2674
+ "86cb4f568f454ff8832face502fb0745": {
2675
+ "model_module": "@jupyter-widgets/output",
2676
+ "model_module_version": "1.0.0",
2677
+ "model_name": "OutputModel",
2678
+ "state": {
2679
+ "_dom_classes": [],
2680
+ "_model_module": "@jupyter-widgets/output",
2681
+ "_model_module_version": "1.0.0",
2682
+ "_model_name": "OutputModel",
2683
+ "_view_count": null,
2684
+ "_view_module": "@jupyter-widgets/output",
2685
+ "_view_module_version": "1.0.0",
2686
+ "_view_name": "OutputView",
2687
+ "layout": "IPY_MODEL_53722998fbe64a7c94829b79e8cd69d6",
2688
+ "msg_id": "",
2689
+ "outputs": [
2690
+ {
2691
+ "name": "stdout",
2692
+ "output_type": "stream",
2693
+ "text": [
2694
+ "Extracted files and directories: instructorTest/, __MACOSX/._instructorTest, instructorTest/bell_charreau.json, __MACOSX/instructorTest/._bell_charreau.json, instructorTest/spencer-smith_jesse.json, __MACOSX/instructorTest/._spencer-smith_jesse.json\n",
2695
+ "\n",
2696
+ "Loading successful!\n",
2697
+ "Learning Objectives: \n",
2698
+ "Extracted JSON files: instructorTest/spencer-smith_jesse.json, instructorTest/bell_charreau.json\n",
2699
+ "Submitted and Reset all values.\n"
2700
+ ]
2701
+ }
2702
+ ]
2703
+ }
2704
+ },
2705
+ "a84d31fb8f4e4bafb74035158834b404": {
2706
+ "model_module": "@jupyter-widgets/controls",
2707
+ "model_module_version": "1.5.0",
2708
+ "model_name": "VBoxModel",
2709
+ "state": {
2710
+ "_dom_classes": [],
2711
+ "_model_module": "@jupyter-widgets/controls",
2712
+ "_model_module_version": "1.5.0",
2713
+ "_model_name": "VBoxModel",
2714
+ "_view_count": null,
2715
+ "_view_module": "@jupyter-widgets/controls",
2716
+ "_view_module_version": "1.5.0",
2717
+ "_view_name": "VBoxView",
2718
+ "box_style": "",
2719
+ "children": [
2720
+ "IPY_MODEL_b051a90758434644955747bc02d00bab",
2721
+ "IPY_MODEL_252b8009f3734ed2908049ebb40c0247",
2722
+ "IPY_MODEL_6622f76f91f44527a87a7575bbd388d2",
2723
+ "IPY_MODEL_654ab6d155eb457ea5c719a9ac27ad5b",
2724
+ "IPY_MODEL_86cb4f568f454ff8832face502fb0745"
2725
+ ],
2726
+ "layout": "IPY_MODEL_e30fe87f01bc4580a61713b5b72439a2"
2727
+ }
2728
+ },
2729
+ "abbecdc637694e7cb026e003244e7037": {
2730
+ "model_module": "@jupyter-widgets/base",
2731
+ "model_module_version": "1.2.0",
2732
+ "model_name": "LayoutModel",
2733
+ "state": {
2734
+ "_model_module": "@jupyter-widgets/base",
2735
+ "_model_module_version": "1.2.0",
2736
+ "_model_name": "LayoutModel",
2737
+ "_view_count": null,
2738
+ "_view_module": "@jupyter-widgets/base",
2739
+ "_view_module_version": "1.2.0",
2740
+ "_view_name": "LayoutView",
2741
+ "align_content": null,
2742
+ "align_items": null,
2743
+ "align_self": null,
2744
+ "border": null,
2745
+ "bottom": null,
2746
+ "display": null,
2747
+ "flex": null,
2748
+ "flex_flow": null,
2749
+ "grid_area": null,
2750
+ "grid_auto_columns": null,
2751
+ "grid_auto_flow": null,
2752
+ "grid_auto_rows": null,
2753
+ "grid_column": null,
2754
+ "grid_gap": null,
2755
+ "grid_row": null,
2756
+ "grid_template_areas": null,
2757
+ "grid_template_columns": null,
2758
+ "grid_template_rows": null,
2759
+ "height": null,
2760
+ "justify_content": null,
2761
+ "justify_items": null,
2762
+ "left": null,
2763
+ "margin": null,
2764
+ "max_height": null,
2765
+ "max_width": null,
2766
+ "min_height": null,
2767
+ "min_width": null,
2768
+ "object_fit": null,
2769
+ "object_position": null,
2770
+ "order": null,
2771
+ "overflow": null,
2772
+ "overflow_x": null,
2773
+ "overflow_y": null,
2774
+ "padding": null,
2775
+ "right": null,
2776
+ "top": null,
2777
+ "visibility": null,
2778
+ "width": null
2779
+ }
2780
+ },
2781
+ "b051a90758434644955747bc02d00bab": {
2782
+ "model_module": "@jupyter-widgets/controls",
2783
+ "model_module_version": "1.5.0",
2784
+ "model_name": "HTMLModel",
2785
+ "state": {
2786
+ "_dom_classes": [],
2787
+ "_model_module": "@jupyter-widgets/controls",
2788
+ "_model_module_version": "1.5.0",
2789
+ "_model_name": "HTMLModel",
2790
+ "_view_count": null,
2791
+ "_view_module": "@jupyter-widgets/controls",
2792
+ "_view_module_version": "1.5.0",
2793
+ "_view_name": "HTMLView",
2794
+ "description": "",
2795
+ "description_tooltip": null,
2796
+ "layout": "IPY_MODEL_d16b25c7e9e948938c9303fbe8ae3dcc",
2797
+ "placeholder": "​",
2798
+ "style": "IPY_MODEL_453b12da4b6540cd9e4e57f73a4d670c",
2799
+ "value": "<h2>Instructor Grading Configuration</h2>"
2800
+ }
2801
+ },
2802
+ "b74cf92175374028948d4cf529d4d1e6": {
2803
+ "model_module": "@jupyter-widgets/base",
2804
+ "model_module_version": "1.2.0",
2805
+ "model_name": "LayoutModel",
2806
+ "state": {
2807
+ "_model_module": "@jupyter-widgets/base",
2808
+ "_model_module_version": "1.2.0",
2809
+ "_model_name": "LayoutModel",
2810
+ "_view_count": null,
2811
+ "_view_module": "@jupyter-widgets/base",
2812
+ "_view_module_version": "1.2.0",
2813
+ "_view_name": "LayoutView",
2814
+ "align_content": null,
2815
+ "align_items": null,
2816
+ "align_self": null,
2817
+ "border": null,
2818
+ "bottom": null,
2819
+ "display": null,
2820
+ "flex": null,
2821
+ "flex_flow": null,
2822
+ "grid_area": null,
2823
+ "grid_auto_columns": null,
2824
+ "grid_auto_flow": null,
2825
+ "grid_auto_rows": null,
2826
+ "grid_column": null,
2827
+ "grid_gap": null,
2828
+ "grid_row": null,
2829
+ "grid_template_areas": null,
2830
+ "grid_template_columns": null,
2831
+ "grid_template_rows": null,
2832
+ "height": null,
2833
+ "justify_content": null,
2834
+ "justify_items": null,
2835
+ "left": null,
2836
+ "margin": null,
2837
+ "max_height": null,
2838
+ "max_width": null,
2839
+ "min_height": null,
2840
+ "min_width": null,
2841
+ "object_fit": null,
2842
+ "object_position": null,
2843
+ "order": null,
2844
+ "overflow": null,
2845
+ "overflow_x": null,
2846
+ "overflow_y": null,
2847
+ "padding": null,
2848
+ "right": null,
2849
+ "top": null,
2850
+ "visibility": null,
2851
+ "width": "auto"
2852
+ }
2853
+ },
2854
+ "d0a1ebdf7fc0473f91c39b29ca580934": {
2855
+ "model_module": "@jupyter-widgets/base",
2856
+ "model_module_version": "1.2.0",
2857
+ "model_name": "LayoutModel",
2858
+ "state": {
2859
+ "_model_module": "@jupyter-widgets/base",
2860
+ "_model_module_version": "1.2.0",
2861
+ "_model_name": "LayoutModel",
2862
+ "_view_count": null,
2863
+ "_view_module": "@jupyter-widgets/base",
2864
+ "_view_module_version": "1.2.0",
2865
+ "_view_name": "LayoutView",
2866
+ "align_content": null,
2867
+ "align_items": null,
2868
+ "align_self": null,
2869
+ "border": null,
2870
+ "bottom": null,
2871
+ "display": null,
2872
+ "flex": null,
2873
+ "flex_flow": null,
2874
+ "grid_area": null,
2875
+ "grid_auto_columns": null,
2876
+ "grid_auto_flow": null,
2877
+ "grid_auto_rows": null,
2878
+ "grid_column": null,
2879
+ "grid_gap": null,
2880
+ "grid_row": null,
2881
+ "grid_template_areas": null,
2882
+ "grid_template_columns": null,
2883
+ "grid_template_rows": null,
2884
+ "height": null,
2885
+ "justify_content": null,
2886
+ "justify_items": null,
2887
+ "left": null,
2888
+ "margin": null,
2889
+ "max_height": null,
2890
+ "max_width": null,
2891
+ "min_height": null,
2892
+ "min_width": null,
2893
+ "object_fit": null,
2894
+ "object_position": null,
2895
+ "order": null,
2896
+ "overflow": null,
2897
+ "overflow_x": null,
2898
+ "overflow_y": null,
2899
+ "padding": null,
2900
+ "right": null,
2901
+ "top": null,
2902
+ "visibility": null,
2903
+ "width": null
2904
+ }
2905
+ },
2906
+ "d0bd0e3f12594ff1a51365b65a3fcc43": {
2907
+ "model_module": "@jupyter-widgets/controls",
2908
+ "model_module_version": "1.5.0",
2909
+ "model_name": "DescriptionStyleModel",
2910
+ "state": {
2911
+ "_model_module": "@jupyter-widgets/controls",
2912
+ "_model_module_version": "1.5.0",
2913
+ "_model_name": "DescriptionStyleModel",
2914
+ "_view_count": null,
2915
+ "_view_module": "@jupyter-widgets/base",
2916
+ "_view_module_version": "1.2.0",
2917
+ "_view_name": "StyleView",
2918
+ "description_width": ""
2919
+ }
2920
+ },
2921
+ "d16b25c7e9e948938c9303fbe8ae3dcc": {
2922
+ "model_module": "@jupyter-widgets/base",
2923
+ "model_module_version": "1.2.0",
2924
+ "model_name": "LayoutModel",
2925
+ "state": {
2926
+ "_model_module": "@jupyter-widgets/base",
2927
+ "_model_module_version": "1.2.0",
2928
+ "_model_name": "LayoutModel",
2929
+ "_view_count": null,
2930
+ "_view_module": "@jupyter-widgets/base",
2931
+ "_view_module_version": "1.2.0",
2932
+ "_view_name": "LayoutView",
2933
+ "align_content": null,
2934
+ "align_items": null,
2935
+ "align_self": null,
2936
+ "border": null,
2937
+ "bottom": null,
2938
+ "display": null,
2939
+ "flex": null,
2940
+ "flex_flow": null,
2941
+ "grid_area": null,
2942
+ "grid_auto_columns": null,
2943
+ "grid_auto_flow": null,
2944
+ "grid_auto_rows": null,
2945
+ "grid_column": null,
2946
+ "grid_gap": null,
2947
+ "grid_row": null,
2948
+ "grid_template_areas": null,
2949
+ "grid_template_columns": null,
2950
+ "grid_template_rows": null,
2951
+ "height": null,
2952
+ "justify_content": null,
2953
+ "justify_items": null,
2954
+ "left": null,
2955
+ "margin": null,
2956
+ "max_height": null,
2957
+ "max_width": null,
2958
+ "min_height": null,
2959
+ "min_width": null,
2960
+ "object_fit": null,
2961
+ "object_position": null,
2962
+ "order": null,
2963
+ "overflow": null,
2964
+ "overflow_x": null,
2965
+ "overflow_y": null,
2966
+ "padding": null,
2967
+ "right": null,
2968
+ "top": null,
2969
+ "visibility": null,
2970
+ "width": null
2971
+ }
2972
+ },
2973
+ "dbb88901f5084d49af208b91b52b6073": {
2974
+ "model_module": "@jupyter-widgets/controls",
2975
+ "model_module_version": "1.5.0",
2976
+ "model_name": "ButtonStyleModel",
2977
+ "state": {
2978
+ "_model_module": "@jupyter-widgets/controls",
2979
+ "_model_module_version": "1.5.0",
2980
+ "_model_name": "ButtonStyleModel",
2981
+ "_view_count": null,
2982
+ "_view_module": "@jupyter-widgets/base",
2983
+ "_view_module_version": "1.2.0",
2984
+ "_view_name": "StyleView",
2985
+ "button_color": null,
2986
+ "font_weight": ""
2987
+ }
2988
+ },
2989
+ "dde20647d3594d31b66b19659f53a95e": {
2990
+ "model_module": "@jupyter-widgets/controls",
2991
+ "model_module_version": "1.5.0",
2992
+ "model_name": "TextareaModel",
2993
+ "state": {
2994
+ "_dom_classes": [],
2995
+ "_model_module": "@jupyter-widgets/controls",
2996
+ "_model_module_version": "1.5.0",
2997
+ "_model_name": "TextareaModel",
2998
+ "_view_count": null,
2999
+ "_view_module": "@jupyter-widgets/controls",
3000
+ "_view_module_version": "1.5.0",
3001
+ "_view_name": "TextareaView",
3002
+ "continuous_update": true,
3003
+ "description": "Learning Objectives",
3004
+ "description_tooltip": null,
3005
+ "disabled": false,
3006
+ "layout": "IPY_MODEL_b74cf92175374028948d4cf529d4d1e6",
3007
+ "placeholder": "Learning objectives: 1. Understand and implement classes in object-oriented programming",
3008
+ "rows": null,
3009
+ "style": "IPY_MODEL_54e3918921f44fb4a9020beab951fcdf",
3010
+ "value": ""
3011
+ }
3012
+ },
3013
+ "df1c46361f714aceb9c046f98fede40c": {
3014
+ "model_module": "@jupyter-widgets/controls",
3015
+ "model_module_version": "1.5.0",
3016
+ "model_name": "ButtonStyleModel",
3017
+ "state": {
3018
+ "_model_module": "@jupyter-widgets/controls",
3019
+ "_model_module_version": "1.5.0",
3020
+ "_model_name": "ButtonStyleModel",
3021
+ "_view_count": null,
3022
+ "_view_module": "@jupyter-widgets/base",
3023
+ "_view_module_version": "1.2.0",
3024
+ "_view_name": "StyleView",
3025
+ "button_color": null,
3026
+ "font_weight": ""
3027
+ }
3028
+ },
3029
+ "dfa8d6c7d70b42468cbda035de89404c": {
3030
+ "model_module": "@jupyter-widgets/base",
3031
+ "model_module_version": "1.2.0",
3032
+ "model_name": "LayoutModel",
3033
+ "state": {
3034
+ "_model_module": "@jupyter-widgets/base",
3035
+ "_model_module_version": "1.2.0",
3036
+ "_model_name": "LayoutModel",
3037
+ "_view_count": null,
3038
+ "_view_module": "@jupyter-widgets/base",
3039
+ "_view_module_version": "1.2.0",
3040
+ "_view_name": "LayoutView",
3041
+ "align_content": null,
3042
+ "align_items": null,
3043
+ "align_self": null,
3044
+ "border": null,
3045
+ "bottom": null,
3046
+ "display": null,
3047
+ "flex": null,
3048
+ "flex_flow": null,
3049
+ "grid_area": null,
3050
+ "grid_auto_columns": null,
3051
+ "grid_auto_flow": null,
3052
+ "grid_auto_rows": null,
3053
+ "grid_column": null,
3054
+ "grid_gap": null,
3055
+ "grid_row": null,
3056
+ "grid_template_areas": null,
3057
+ "grid_template_columns": null,
3058
+ "grid_template_rows": null,
3059
+ "height": null,
3060
+ "justify_content": null,
3061
+ "justify_items": null,
3062
+ "left": null,
3063
+ "margin": null,
3064
+ "max_height": null,
3065
+ "max_width": null,
3066
+ "min_height": null,
3067
+ "min_width": null,
3068
+ "object_fit": null,
3069
+ "object_position": null,
3070
+ "order": null,
3071
+ "overflow": null,
3072
+ "overflow_x": null,
3073
+ "overflow_y": null,
3074
+ "padding": null,
3075
+ "right": null,
3076
+ "top": null,
3077
+ "visibility": null,
3078
+ "width": null
3079
+ }
3080
+ },
3081
+ "e30fe87f01bc4580a61713b5b72439a2": {
3082
+ "model_module": "@jupyter-widgets/base",
3083
+ "model_module_version": "1.2.0",
3084
+ "model_name": "LayoutModel",
3085
+ "state": {
3086
+ "_model_module": "@jupyter-widgets/base",
3087
+ "_model_module_version": "1.2.0",
3088
+ "_model_name": "LayoutModel",
3089
+ "_view_count": null,
3090
+ "_view_module": "@jupyter-widgets/base",
3091
+ "_view_module_version": "1.2.0",
3092
+ "_view_name": "LayoutView",
3093
+ "align_content": null,
3094
+ "align_items": "stretch",
3095
+ "align_self": null,
3096
+ "border": "solid 1px gray",
3097
+ "bottom": null,
3098
+ "display": "flex",
3099
+ "flex": null,
3100
+ "flex_flow": "column",
3101
+ "grid_area": null,
3102
+ "grid_auto_columns": null,
3103
+ "grid_auto_flow": null,
3104
+ "grid_auto_rows": null,
3105
+ "grid_column": null,
3106
+ "grid_gap": null,
3107
+ "grid_row": null,
3108
+ "grid_template_areas": null,
3109
+ "grid_template_columns": null,
3110
+ "grid_template_rows": null,
3111
+ "height": null,
3112
+ "justify_content": null,
3113
+ "justify_items": null,
3114
+ "left": null,
3115
+ "margin": null,
3116
+ "max_height": null,
3117
+ "max_width": null,
3118
+ "min_height": null,
3119
+ "min_width": null,
3120
+ "object_fit": null,
3121
+ "object_position": null,
3122
+ "order": null,
3123
+ "overflow": null,
3124
+ "overflow_x": null,
3125
+ "overflow_y": null,
3126
+ "padding": "0px 30px 20px 30px",
3127
+ "right": null,
3128
+ "top": null,
3129
+ "visibility": null,
3130
+ "width": "50%"
3131
+ }
3132
+ },
3133
+ "f7d75b0a32554a9589c513336fc30095": {
3134
+ "model_module": "@jupyter-widgets/controls",
3135
+ "model_module_version": "1.5.0",
3136
+ "model_name": "DescriptionStyleModel",
3137
+ "state": {
3138
+ "_model_module": "@jupyter-widgets/controls",
3139
+ "_model_module_version": "1.5.0",
3140
+ "_model_name": "DescriptionStyleModel",
3141
+ "_view_count": null,
3142
+ "_view_module": "@jupyter-widgets/base",
3143
+ "_view_module_version": "1.2.0",
3144
+ "_view_name": "StyleView",
3145
+ "description_width": "initial"
3146
+ }
3147
+ }
3148
+ }
3149
+ }
3150
+ },
3151
+ "nbformat": 4,
3152
+ "nbformat_minor": 0
3153
+ }
lo-achievement/instructor_intr_notebook_example_training.ipynb ADDED
@@ -0,0 +1,1277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "view-in-github",
7
+ "colab_type": "text"
8
+ },
9
+ "source": [
10
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/adding_grading_levels_to_instructor_nb/instructor_intr_notebook_example_training.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {
16
+ "id": "WMKrKfx8_3fc"
17
+ },
18
+ "source": [
19
+ "# Instructor Grading and Assessment\n",
20
+ "This notebook executes grading of student submissions based on the examples provided in the [Wiki](https://github.com/vanderbilt-data-science/lo-achievement/wiki/Examples-of-great,-good,-and-poor-answers-to-questions) from Dr. Jesse Blocher. In this iteration, we use the Unstructured File Loader, which cannot proccess .json files (the preferred format). We are working on finding a file loader that allows .json. In this version of the notebook, the model has only been trained on Question 2 from the notebook.\n",
21
+ "\n",
22
+ "To train the model, we used 2 out of the three student example from each grade brack and inputted into a .pdf with clearly defined levels. Then, we used the excluded answers to test the accuracy of the model's grading."
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "markdown",
27
+ "source": [
28
+ "# Load and Install Neccessary Libraries"
29
+ ],
30
+ "metadata": {
31
+ "id": "2UQgQSoMx4My"
32
+ }
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "source": [
37
+ "! pip install -q langchain=='0.0.229' openai gradio numpy chromadb tiktoken unstructured pdf2image pydantic==\"1.10.8\" jq"
38
+ ],
39
+ "metadata": {
40
+ "id": "UJi1Oy0CyPHD"
41
+ },
42
+ "execution_count": null,
43
+ "outputs": []
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "source": [
48
+ "# import necessary libraries here\n",
49
+ "from getpass import getpass\n",
50
+ "from langchain.llms import OpenAI as openai\n",
51
+ "from langchain.chat_models import ChatOpenAI\n",
52
+ "from langchain.prompts import PromptTemplate\n",
53
+ "from langchain.document_loaders import TextLoader\n",
54
+ "from langchain.indexes import VectorstoreIndexCreator\n",
55
+ "from langchain.text_splitter import CharacterTextSplitter\n",
56
+ "from langchain.embeddings import OpenAIEmbeddings\n",
57
+ "from langchain.schema import SystemMessage, HumanMessage, AIMessage\n",
58
+ "import numpy as np\n",
59
+ "import os\n",
60
+ "from langchain.vectorstores import Chroma\n",
61
+ "from langchain.document_loaders.unstructured import UnstructuredFileLoader\n",
62
+ "from langchain.document_loaders import UnstructuredFileLoader\n",
63
+ "from langchain.chains import VectorDBQA\n",
64
+ "from langchain.document_loaders import JSONLoader\n",
65
+ "import json\n",
66
+ "from pathlib import Path\n",
67
+ "from pprint import pprint\n",
68
+ "\n",
69
+ "\n",
70
+ "from langchain.prompts.few_shot import FewShotPromptTemplate\n",
71
+ "from langchain.prompts.prompt import PromptTemplate"
72
+ ],
73
+ "metadata": {
74
+ "id": "YHytCUoExrYe"
75
+ },
76
+ "execution_count": 2,
77
+ "outputs": []
78
+ },
79
+ {
80
+ "cell_type": "markdown",
81
+ "source": [
82
+ "# Set up model and pass OpenAI Key\n",
83
+ "Here we are setting up the model and using a system message to pass a persona prompt with the grading advice"
84
+ ],
85
+ "metadata": {
86
+ "id": "4elyN72szz-_"
87
+ }
88
+ },
89
+ {
90
+ "cell_type": "code",
91
+ "source": [
92
+ "# setup open AI api key\n",
93
+ "openai_api_key = getpass()"
94
+ ],
95
+ "metadata": {
96
+ "id": "jVPEFX3ixJnM"
97
+ },
98
+ "execution_count": null,
99
+ "outputs": []
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "source": [
104
+ "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
105
+ "openai.api_key = openai_api_key"
106
+ ],
107
+ "metadata": {
108
+ "id": "obplpeB78h_M"
109
+ },
110
+ "execution_count": null,
111
+ "outputs": []
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "source": [
116
+ "# Initiate model (model type and specify persona)\n",
117
+ "llm = ChatOpenAI(model='gpt-3.5-turbo-16k')\n",
118
+ "messages = [\n",
119
+ " SystemMessage(content=\"You are a helpful grading assistant. In grading the following questions, keep in mind the advice from the professor: one aspect of it was being specific. The poor answers have a lot of platitudes, the better answers give specific examples. Secondly, they should discuss automation and/or prediction specifically. Those are the things that ML does, it is not 'technology' broadly.\"),\n",
120
+ " HumanMessage(content=\"\")\n",
121
+ "]"
122
+ ],
123
+ "metadata": {
124
+ "id": "e_ZavOnS8iuE"
125
+ },
126
+ "execution_count": null,
127
+ "outputs": []
128
+ },
129
+ {
130
+ "cell_type": "markdown",
131
+ "source": [
132
+ "# Original route: Vector Stores from .json files\n",
133
+ "This section uses .json files and vector stores with system/human messaging and a persona prompt tailored to Dr. Blocher's grading philosophy for the Wiki examples (only includes section 2 at this time)."
134
+ ],
135
+ "metadata": {
136
+ "id": "KHS3E6PydN-2"
137
+ }
138
+ },
139
+ {
140
+ "cell_type": "markdown",
141
+ "source": [
142
+ "## Grading based on A, B, and C-level answers from previous students to Question 2 from the [Wiki](https://github.com/vanderbilt-data-science/lo-achievement/wiki/Examples-of-great,-good,-and-poor-answers-to-questions):\n",
143
+ "\n",
144
+ "**Question 2:** Why is machine learning so important for businesses? Answer this question generally (i.e. such that it applies to many or at least most businesses)."
145
+ ],
146
+ "metadata": {
147
+ "id": "IYCBSD_8l7uu"
148
+ }
149
+ },
150
+ {
151
+ "cell_type": "markdown",
152
+ "source": [
153
+ "### Creating .json file from case examples (Question 2)\n",
154
+ "The purpose of this cell is to create a json file based on the previously submitted, graded work of students based on the case file provided by Dr. Blocher in the Wiki. So, here you could create your own file, or for quick demo purposes you can use the zip file in the next section heading."
155
+ ],
156
+ "metadata": {
157
+ "id": "TYlGEusr64kA"
158
+ }
159
+ },
160
+ {
161
+ "cell_type": "code",
162
+ "source": [
163
+ "q2 = 'Question 2: Why is machine learning so important for businesses? Answer this question generally (i.e. such that it applies to many or at least most businesses).'"
164
+ ],
165
+ "metadata": {
166
+ "id": "DAIaKTRYxOlh"
167
+ },
168
+ "execution_count": null,
169
+ "outputs": []
170
+ },
171
+ {
172
+ "cell_type": "code",
173
+ "source": [
174
+ "# A-level answers\n",
175
+ "\n",
176
+ "q2_A_answer_1 = 'Machine learning is extremely important tool for businesses. It can be used in a variety of ways, but most importantly, it can be used to identify patterns within their data that might not otherwise be identified by human beings. For example, it can understand customer behaviors, optimize logistics, and expand efficiencies throughout the business. Machine learning does not get tired, meaning it can work as long as you want it to. It can sift through massive amounts of data, that no human being can look through in an efficient manner. Machine learning can be used as a tool to identify anomalies when something needs to be checked to save or gain money. The predictions that companies gain from machine learning are cheap, accurate, and automate. These machine learning algorithms can be brought to larger scales to encompass the whole business and its operations. It is important to note, Machine learning is just predictions. Predictions to understand important patterns that could make or break a company since they understand the patterns of their business more. It is an amazing tool, but should be used wisely and carefully because if not, it can expensive, useless, and straight up wrong.'\n",
177
+ "q2_A_answer_2 = 'Machine learning is important for most of the sectors in business. Overall, it gives the company of an overview about what would be the trend for their business industry, and analyze the customer behavior to help business segment their customers groups. Today, many companies have a vast amount of information generated by behavior, computer, events, people, and devices. This massive amount of data is difficult for human to handle, and even if human manages it, it is not profitable as human labor is expensive. Thanks to machine learning, companies can utilize their in-house or even third-party data to make something useful for their business. In medical analysis, for example, with human, it takes a very long time to find patterns in thousands of MRI scans. On the other hand, machines can detect patterns in seconds by entering data as long as the information is correctly labeled or trained properly. Another example would be segmenting customer group. In marketing department, the business could use unsupervised machine learning to cluster their customer segments to generate personalized contents that are relevant for each of individuals.'\n",
178
+ "\n",
179
+ "# List creation\n",
180
+ "\n",
181
+ "q2_A_answers_list = [q2_A_answer_1, q2_A_answer_2]\n"
182
+ ],
183
+ "metadata": {
184
+ "id": "yQT6aExSr1dP"
185
+ },
186
+ "execution_count": 9,
187
+ "outputs": []
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "source": [
192
+ "# B-level answers\n",
193
+ "\n",
194
+ "q2_B_answer_1 = 'Companies use ML models to improve different aspects of their business, like manufacturing, hiring, deployment, advertising, etc. The main goal is to improve productive and increase profitability of the company. The ML models are fed with company and externally available data to help the company optimize its departments and in turn become more financially successful/ productive. For example, using purchasing history, the company can predict who to advertise products to, to increase sales.'\n",
195
+ "q2_B_answer_2 = 'Machine learning allows business to have automated decision, scale, predictive analysis and performance. Machine learning also helps a business have a data strategy. This is how a firm uses data, data infrastructure, governance, etc. to accomplish its strategic goals and maintain/grow their competitive advantage within their industry.'\n",
196
+ "q2_B_answer_3 = 'The short answer is ML can help make decisions for businesses. To be clarified, ML does not make decisions for businesses. I mean it can, but people have not trusted ML enough yet and ML has not been that good to let it directly make business decisions. Business people only use ML to help themselves get intuitions of how decisions should be made and make predictions of results they might get based on their decisions. For example, if a business tries to launch a new product, it will use ML to test whether it will work or not on a small scale before it is introduced to a large scale. People called this step piloting. In this step, people collect data that is generated by using the pilot product and analyze their next move. They could tweak some features based on the feedback. If they think the data in their interest shows the product performs well, they might predict that this product will be successful when it is introduced on a large scale. Then, they will launch it.'\n",
197
+ "# List creation\n",
198
+ "\n",
199
+ "q2_B_answers_list = [q2_B_answer_1, q2_B_answer_2, q2_B_answer_3]"
200
+ ],
201
+ "metadata": {
202
+ "id": "KB1CmeRwtRvf"
203
+ },
204
+ "execution_count": 10,
205
+ "outputs": []
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "source": [
210
+ "# C-level answers\n",
211
+ "\n",
212
+ "q2_C_answer_1 = 'Machine learning powers many of the services we use today, such as the recommendation systems of Spotify and Netflix; search engines such as Google and Bing; social media such as TikTok and Instagram; voices such as Siri and Alexa, the list can go on. All these examples show that machine learning is already starting to play a pivotal role in today\"s data-rich world. Machines can help us sift through useful information that can lead to big breakthroughs, and we have seen the widespread use of this technology in various industries such as finance, healthcare, insurance, manufacturing, transformational change, etc.'\n",
213
+ "q2_C_answer_2 = 'As technology advanced, there are tons of new data generated and stored. All industries experienced this surge in data, including business. There is a huge amount of business data stored and waited in the database of each firm and they need solutions to utilize these data. Machine learning is a very promising approach for firms to puts these data in and output a meaning pattern or result that could help the firms with their existing work. This could turn into a working product or provide insights that could enhance the efficiency of the company’s workflow. With machine learning, a firm could either enter a new market with the new product or save time and effort with the meaningful insights. Achieving these possibilities with the data they already owned is a low effort but high reward action. This is the reason machine learning is valued by many businesses recently.'\n",
214
+ "\n",
215
+ "# List creation\n",
216
+ "\n",
217
+ "q2_C_answers_list = [q2_C_answer_1, q2_C_answer_2]"
218
+ ],
219
+ "metadata": {
220
+ "id": "3diAz43othjc"
221
+ },
222
+ "execution_count": 11,
223
+ "outputs": []
224
+ },
225
+ {
226
+ "cell_type": "code",
227
+ "source": [
228
+ "q2_Q_and_A = [\"Question:\", q2, \"A-level Answers\", q2_A_answers_list, \"B-level Answers\", q2_B_answers_list, \"C-level Answers\", q2_C_answers_list]"
229
+ ],
230
+ "metadata": {
231
+ "id": "oAnrMSU6u9do"
232
+ },
233
+ "execution_count": null,
234
+ "outputs": []
235
+ },
236
+ {
237
+ "cell_type": "code",
238
+ "source": [
239
+ "import json\n",
240
+ "from google.colab import files\n",
241
+ "\n",
242
+ "def save_example_answers(examples, filename='wiki_ABC_Q2examples.json'):\n",
243
+ " with open(filename, 'w') as file:\n",
244
+ " json.dump(examples, file)\n",
245
+ " files.download(filename)\n",
246
+ "\n",
247
+ "save_example_answers(q2_Q_and_A)"
248
+ ],
249
+ "metadata": {
250
+ "colab": {
251
+ "base_uri": "https://localhost:8080/",
252
+ "height": 17
253
+ },
254
+ "id": "B16iYMEnri9s",
255
+ "outputId": "3e565b3d-804c-4b5e-acc8-efeb955c6c14"
256
+ },
257
+ "execution_count": null,
258
+ "outputs": [
259
+ {
260
+ "output_type": "display_data",
261
+ "data": {
262
+ "text/plain": [
263
+ "<IPython.core.display.Javascript object>"
264
+ ],
265
+ "application/javascript": [
266
+ "\n",
267
+ " async function download(id, filename, size) {\n",
268
+ " if (!google.colab.kernel.accessAllowed) {\n",
269
+ " return;\n",
270
+ " }\n",
271
+ " const div = document.createElement('div');\n",
272
+ " const label = document.createElement('label');\n",
273
+ " label.textContent = `Downloading \"${filename}\": `;\n",
274
+ " div.appendChild(label);\n",
275
+ " const progress = document.createElement('progress');\n",
276
+ " progress.max = size;\n",
277
+ " div.appendChild(progress);\n",
278
+ " document.body.appendChild(div);\n",
279
+ "\n",
280
+ " const buffers = [];\n",
281
+ " let downloaded = 0;\n",
282
+ "\n",
283
+ " const channel = await google.colab.kernel.comms.open(id);\n",
284
+ " // Send a message to notify the kernel that we're ready.\n",
285
+ " channel.send({})\n",
286
+ "\n",
287
+ " for await (const message of channel.messages) {\n",
288
+ " // Send a message to notify the kernel that we're ready.\n",
289
+ " channel.send({})\n",
290
+ " if (message.buffers) {\n",
291
+ " for (const buffer of message.buffers) {\n",
292
+ " buffers.push(buffer);\n",
293
+ " downloaded += buffer.byteLength;\n",
294
+ " progress.value = downloaded;\n",
295
+ " }\n",
296
+ " }\n",
297
+ " }\n",
298
+ " const blob = new Blob(buffers, {type: 'application/binary'});\n",
299
+ " const a = document.createElement('a');\n",
300
+ " a.href = window.URL.createObjectURL(blob);\n",
301
+ " a.download = filename;\n",
302
+ " div.appendChild(a);\n",
303
+ " a.click();\n",
304
+ " div.remove();\n",
305
+ " }\n",
306
+ " "
307
+ ]
308
+ },
309
+ "metadata": {}
310
+ },
311
+ {
312
+ "output_type": "display_data",
313
+ "data": {
314
+ "text/plain": [
315
+ "<IPython.core.display.Javascript object>"
316
+ ],
317
+ "application/javascript": [
318
+ "download(\"download_8ec6f24c-9653-4335-8c1b-766926434399\", \"wiki_ABC_Q2examples.json\", 5931)"
319
+ ]
320
+ },
321
+ "metadata": {}
322
+ }
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "markdown",
327
+ "source": [
328
+ "### Creating a Vector Store\n",
329
+ "Here we create a vector store based on the .json file (which you can find at this [link](https://drive.google.com/file/d/1nk6JhbqoUHFie-Ewb436pdV-onlObZUt/view?usp=sharing), but will need to unzip)."
330
+ ],
331
+ "metadata": {
332
+ "id": "exXR-A2oxWeg"
333
+ }
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "source": [
338
+ "# Upload .json or zip\n",
339
+ "from google.colab import files\n",
340
+ "uploaded = files.upload()"
341
+ ],
342
+ "metadata": {
343
+ "id": "Wpt6qsmEw8WP"
344
+ },
345
+ "execution_count": null,
346
+ "outputs": []
347
+ },
348
+ {
349
+ "cell_type": "code",
350
+ "source": [
351
+ "# Unzip file if neccessary\n",
352
+ "!unzip '/content/wiki_ABC_Q2examples (2).json.zip'"
353
+ ],
354
+ "metadata": {
355
+ "id": "7T2LpkiZh9LT"
356
+ },
357
+ "execution_count": null,
358
+ "outputs": []
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "source": [
363
+ "# Create file path\n",
364
+ "data = '/content/wiki_ABC_Q2examples (2).json'"
365
+ ],
366
+ "metadata": {
367
+ "id": "CVY0CVvhxyCu"
368
+ },
369
+ "execution_count": null,
370
+ "outputs": []
371
+ },
372
+ {
373
+ "cell_type": "code",
374
+ "source": [
375
+ "# Load the .json\n",
376
+ "data = json.loads(Path(file_path).read_text())\n",
377
+ "data = str(data)"
378
+ ],
379
+ "metadata": {
380
+ "id": "dHjK0nN6yYPH"
381
+ },
382
+ "execution_count": null,
383
+ "outputs": []
384
+ },
385
+ {
386
+ "cell_type": "code",
387
+ "source": [
388
+ "# Create Vector Store\n",
389
+ "\n",
390
+ "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
391
+ "texts = text_splitter.split_text(data)\n",
392
+ "\n",
393
+ "embeddings = OpenAIEmbeddings()\n",
394
+ "\n",
395
+ "db = Chroma.from_texts(texts, embeddings)\n",
396
+ "\n",
397
+ "qa = VectorDBQA.from_chain_type(llm=llm, chain_type=\"stuff\", vectorstore=db, k=1)"
398
+ ],
399
+ "metadata": {
400
+ "id": "paibxyeuxnu1"
401
+ },
402
+ "execution_count": null,
403
+ "outputs": []
404
+ },
405
+ {
406
+ "cell_type": "code",
407
+ "source": [
408
+ "# Creating examples from student answers to test the Wiki\n",
409
+ "\n",
410
+ "example1 = 'The most powerful aspect of machine learning is its ability to automate processes. If the goal is well defined and repeatable, an algorithm can be trained to perform that task far faster than any human and often with more reliability. Because of this, businesses can implement machine learning algorithms to solve problems human labor was previously required for, whether that be mental or physical labor. Although implementing machine learning often has high initial costs, because computers do not require payment outside of maintaining their operation, in the long run companies can save money by either making their workers’ tasks more efficient or by entirely automating tasks. This increases the profit margins for those firms.'\n",
411
+ "# B level\n",
412
+ "example2 ='ML systems can help improve access to data while managing compliance. It helps businesspeople deal with data in a more efficient way, since there is numerous data generated per second in such industry. If they use some of the simple classification models with high accuracy, it will save them a lot of time on data cleaning and validation, which are kind of repetitive and time-consuming. For example, NLP provides an easy way for personnel to query business information, understand business processes, and discover new relationships between business data, ideas based on intuition and insight often emerge. Using models to do prediction helps people making wiser decision. Since models can handle way more data than human, newly collected data can feed in the model and get some predictive result as a reference to the decision makers. This is significant because in this industry, time is precious, and traders must decide quickly and precisely. A little negligence will lead to a big mistake, lose a lot of money, and even affect the company\"s reputation. Models can see patterns that are not easy for human to spot, which is also valuable for modify the way people doing analysis and interpret.'\n",
413
+ "# C level\n",
414
+ "example3 = 'The machine learning model (or one a broader view, artificial intelligence) is about prediction. According to the lecture, there are tree main innovations in it. Prediction is cheap, more accurate and automated. As a result, armed with machine learning, businesses could automatically gain much more accurate and powerful capabilities in forecasting, leading to a big savings both in time and money.'\n",
415
+ "\n",
416
+ "# Randomized list of answers\n",
417
+ "training_answers = [example2, example1, example3]"
418
+ ],
419
+ "metadata": {
420
+ "id": "-fyY-ftV-ZbG"
421
+ },
422
+ "execution_count": null,
423
+ "outputs": []
424
+ },
425
+ {
426
+ "cell_type": "markdown",
427
+ "source": [
428
+ "## Grading based on A, B, and C-level answers from previous students to Question 4 from the [Wiki](https://github.com/vanderbilt-data-science/lo-achievement/wiki/Examples-of-great,-good,-and-poor-answers-to-questions):\n",
429
+ "\n",
430
+ "**Question 4:** Describe an example where machine learning is not likely to work well. This should be an example where intelligent, rational people could think of using machine learning or even are already implementing it. Describe specifically why you think ML is unlikely to work well. This should not be an example where someone is implementing it badly (i.e. skipping testing). Rather, it should be an example of how ML simply will not work well, no matter how many smart people work carefully on the problem. You may use examples we’ve discussed in class; it does not need to be a novel application."
431
+ ],
432
+ "metadata": {
433
+ "id": "sSnxqnGDmPzC"
434
+ }
435
+ },
436
+ {
437
+ "cell_type": "markdown",
438
+ "source": [
439
+ "### Creating .json file from case examples (Question 4)\n",
440
+ "The purpose of this cell is to create a json file based on the previously submitted, graded work of students based on the case file provided by Dr. Blocher in the Wiki."
441
+ ],
442
+ "metadata": {
443
+ "id": "RMkTwaEFmhWd"
444
+ }
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "source": [
449
+ "# Context: question\n",
450
+ "\n",
451
+ "q4 = 'Question 4: Describe an example where machine learning is not likely to work well. This should be an example where intelligent, rational people could think of using machine learning or even are already implementing it. Describe specifically why you think ML is unlikely to work well. This should not be an example where someone is implementing it badly (i.e. skipping testing). Rather, it should be an example of how ML simply will not work well, no matter how many smart people work carefully on the problem. You may use examples we’ve discussed in class; it does not need to be a novel application.'"
452
+ ],
453
+ "metadata": {
454
+ "id": "_UVEgvzKmltw"
455
+ },
456
+ "execution_count": null,
457
+ "outputs": []
458
+ },
459
+ {
460
+ "cell_type": "code",
461
+ "source": [
462
+ "# A-level answers\n",
463
+ "\n",
464
+ "q4_A_answer_1 = 'This is one of the cases discussed in class: Machine learning on the management of at-risk children. Traditionally, at-risk children are mostly determined by social workers and they would report to their agencies if some children are at-risk. The at-risk children will subsequentially be taken into custody before their circumstance is evaluated and a decision about their future is reached. If a machine-learning algorithm were to replace the aforementioned social worker: the biggest concern would be when the algorithm outputs a false positive or false negative prediction. A false positive means that the children are said to be at risk while actually not and removed from their households. A false negative would be when some children are not removed from the dangerous household because the algorithm returned negative results. The costs of both incorrect decisions are too big for anyone to afford. Admittedly, data scientists might be able to develop a great algorithm with very low false positive and false negative rates, and that social workers can easily make the same amount of mistakes or more, but it is still too much for our humane society to take by putting such a life-changing or even life-or-death decision in a machine’s metaphorical hands.'\n",
465
+ "q4_A_answer_2 = 'I believe machine learning will not work well in the court system when making sentences and decisions. I have heard in the news that machine learning was helping judges sentence defenders. Currently, I am unaware if this is still happening but this will not work because our court system has a racial basis. This racial bias stems back hundreds of years and a machine-learning model would exploit the discrimination already so present in our system. The dataset used to train the model has bias that would unfairly sentence a person because of their demographic. This overall is bad because there is no dataset you can use to train a model that already does not include some sort of inherited bias.'\n",
466
+ "\n",
467
+ "# List creation\n",
468
+ "\n",
469
+ "q4_A_answers_list = [q4_A_answer_1, q4_A_answer_2]\n"
470
+ ],
471
+ "metadata": {
472
+ "id": "Irw0U9PBmupJ"
473
+ },
474
+ "execution_count": null,
475
+ "outputs": []
476
+ },
477
+ {
478
+ "cell_type": "code",
479
+ "source": [
480
+ "# B-level answers\n",
481
+ "\n",
482
+ "q4_B_answer_1 = 'I believe that machine learning won’t work well in the advent of self-driving cars. As discussed in class, machines are bad at “analogies, small data or rare events, and identifying causality” (Lecture 4: Data Strategy.) While most driving occurrences are habitual and can be predicted, especially day-to-day events like driving to work or the grocery store, so much driving is small or rare events that can’t necessarily be predicted. For example, you can’t predict that the person behind you is texting while driving and is going to rear end you, or that a snowstorm can make you start to hydroplane on the road. So much of the process of learning is understanding the exceptions, such as driving in the rain, at night, or almost avoiding an accident. For self-driving cars to be successful, so much data is required to reduce error, which I don’t think is possible to acquire. While most algorithms can have leeway in their accuracy, self-driving cars cannot have any errors — a difficult feat.'\n",
483
+ "q4_B_answer_2 = 'As we discussed in class, in a hospital setting, ML cannot do the task of diagnosing medical conditions well. While ML have been used in some diagnostic tasks, such as detecting some certain types of cancer, it generally to be considered difficult and will not be used in a large scale because medical conditions can have many different conditions can have many different symptoms and can be difficult to diagnose even for experienced doctors. The training of a machine learning model for diagnosing medical conditions requires a large dataset of medical records, along with labels indicating each diagnosis. But there are several barriers. First, medical records are often private and sensitive, and sharing them may raise legal and ethical concerns. Second, medical conditions are complicated, it’s hard to ensure the labels are correct, some of which may takes years or decades to testify. Even if they can recognize some pattern for some disease, symptoms vary from person to person. At a first glance, even after running some tests, it’s hard for doctors to tell the case. It will be harder to use ML in diagnose in general.'\n",
484
+ "\n",
485
+ "q4_B_answers_list = [q4_B_answer_1, q4_B_answer_2]"
486
+ ],
487
+ "metadata": {
488
+ "id": "haKh_FsenmT3"
489
+ },
490
+ "execution_count": null,
491
+ "outputs": []
492
+ },
493
+ {
494
+ "cell_type": "code",
495
+ "source": [
496
+ "# C-level answers\n",
497
+ "\n",
498
+ "q4_C_answer_1 = 'One example would be unstable models. Some of the models frequently exhibit extreme instability and degrade over time. In such circumstances, the company can call for model monitoring and high-frequency model revision. Businesses may begin to revert to intuition-based approach when model creation lead times increase.'\n",
499
+ "q4_C_answer_2 = 'The example I want to describe is about GPS. We have heard that some drivers who blindly followed their GPS into disaster. The news reported that Three tourists from Bellevue, Washington, got lost after midnight in 2011 when they were unable to find their way back to the hotel. They took what they believed to be a road that would take them to the freeway after requesting the GPS to reroute them. Instead, their SUV submerged into a large body of water. These people trust algorithms and data more than their own logic and judgement. People who drive into the water just because they believe too much in GPS. Algorithms will never think like a human being and cannot make moral judgement like a human being. Machine learning is unable to provide any guidance on the accepted norms. Machine minds & human minds could never be the same. Everyone’s answer is all different, We cannot collect everyone’s information around the world, that is so hard. As long as there is one person’s information not counted, then it will have difference from what you predict. The world always require human’s judgement and ethics, that is the point can not be changed.'\n",
500
+ "q4_C_answer_3 = 'The organization and the abundance of your data is paramount in the applying machine learning. When your data are either incomplete, like not enough data, or unstructured, like messy data with a lot of missingness and inconsistencies, you are not likely to obtain a good result even from implementing the best model in the world. There are some techniques that you might be able to amplify your data or transfer other data into your own, such as transfer learning or domain adaptation. However, it is still strongly recommended that you should have plenty of data to perform machine learning on. Your data should also be able to well represent the question you are trying to answer. Low coverage of the true target population is likely to result in skewness of the result. Machines learning is not likely to work well in this scenario.'\n",
501
+ "\n",
502
+ "# List creation\n",
503
+ "\n",
504
+ "q4_C_answers_list = [q4_C_answer_1, q4_C_answer_2, q4_C_answer_3]"
505
+ ],
506
+ "metadata": {
507
+ "id": "YNHULek7oCE_"
508
+ },
509
+ "execution_count": null,
510
+ "outputs": []
511
+ },
512
+ {
513
+ "cell_type": "code",
514
+ "source": [
515
+ "q4_Q_and_A = [\"Question:\", q4, \"A-level Answers\", q4_A_answers_list, \"B-level Answers\", q4_B_answers_list, \"C-level Answers\", q4_C_answers_list]"
516
+ ],
517
+ "metadata": {
518
+ "id": "9bXSVmPnojIh"
519
+ },
520
+ "execution_count": null,
521
+ "outputs": []
522
+ },
523
+ {
524
+ "cell_type": "code",
525
+ "source": [
526
+ "import json\n",
527
+ "from google.colab import files\n",
528
+ "\n",
529
+ "def save_example_answers(examples, filename='wiki_ABC_Q4examples.json'):\n",
530
+ " with open(filename, 'w') as file:\n",
531
+ " json.dump(examples, file)\n",
532
+ " files.download(filename)\n",
533
+ "\n",
534
+ "save_example_answers(q4_Q_and_A)"
535
+ ],
536
+ "metadata": {
537
+ "colab": {
538
+ "base_uri": "https://localhost:8080/",
539
+ "height": 17
540
+ },
541
+ "outputId": "3e565b3d-804c-4b5e-acc8-efeb955c6c14",
542
+ "id": "Qqwk0mvroqWz"
543
+ },
544
+ "execution_count": null,
545
+ "outputs": [
546
+ {
547
+ "output_type": "display_data",
548
+ "data": {
549
+ "text/plain": [
550
+ "<IPython.core.display.Javascript object>"
551
+ ],
552
+ "application/javascript": [
553
+ "\n",
554
+ " async function download(id, filename, size) {\n",
555
+ " if (!google.colab.kernel.accessAllowed) {\n",
556
+ " return;\n",
557
+ " }\n",
558
+ " const div = document.createElement('div');\n",
559
+ " const label = document.createElement('label');\n",
560
+ " label.textContent = `Downloading \"${filename}\": `;\n",
561
+ " div.appendChild(label);\n",
562
+ " const progress = document.createElement('progress');\n",
563
+ " progress.max = size;\n",
564
+ " div.appendChild(progress);\n",
565
+ " document.body.appendChild(div);\n",
566
+ "\n",
567
+ " const buffers = [];\n",
568
+ " let downloaded = 0;\n",
569
+ "\n",
570
+ " const channel = await google.colab.kernel.comms.open(id);\n",
571
+ " // Send a message to notify the kernel that we're ready.\n",
572
+ " channel.send({})\n",
573
+ "\n",
574
+ " for await (const message of channel.messages) {\n",
575
+ " // Send a message to notify the kernel that we're ready.\n",
576
+ " channel.send({})\n",
577
+ " if (message.buffers) {\n",
578
+ " for (const buffer of message.buffers) {\n",
579
+ " buffers.push(buffer);\n",
580
+ " downloaded += buffer.byteLength;\n",
581
+ " progress.value = downloaded;\n",
582
+ " }\n",
583
+ " }\n",
584
+ " }\n",
585
+ " const blob = new Blob(buffers, {type: 'application/binary'});\n",
586
+ " const a = document.createElement('a');\n",
587
+ " a.href = window.URL.createObjectURL(blob);\n",
588
+ " a.download = filename;\n",
589
+ " div.appendChild(a);\n",
590
+ " a.click();\n",
591
+ " div.remove();\n",
592
+ " }\n",
593
+ " "
594
+ ]
595
+ },
596
+ "metadata": {}
597
+ },
598
+ {
599
+ "output_type": "display_data",
600
+ "data": {
601
+ "text/plain": [
602
+ "<IPython.core.display.Javascript object>"
603
+ ],
604
+ "application/javascript": [
605
+ "download(\"download_8ec6f24c-9653-4335-8c1b-766926434399\", \"wiki_ABC_Q2examples.json\", 5931)"
606
+ ]
607
+ },
608
+ "metadata": {}
609
+ }
610
+ ]
611
+ },
612
+ {
613
+ "cell_type": "markdown",
614
+ "source": [
615
+ "## Grading based on A, B, and C-level answers from previous students to Question 5 from the [Wiki](https://github.com/vanderbilt-data-science/lo-achievement/wiki/Examples-of-great,-good,-and-poor-answers-to-questions):\n",
616
+ "\n",
617
+ "**Question 5:** Describe a new example where you think machine learning could work. This cannot be examples we’ve given in class like customer churn, fraud detection, image or object recognition, disease diagnosis, stock prediction, self-driving cars, etc. t does not need to be completely novel, e.g., you can look for news stories and find something new. I want something that is different from the commonly used examples. You may use an example that you or another student brought up in class if it meets these criteria. Be sure to explain why you think it is an example where ML could work. Note: You do not need to be proven right that ML works. I am mostly interested in your logic and arguments of why you think it is a good idea, drawing on what we’ve discussed in class."
618
+ ],
619
+ "metadata": {
620
+ "id": "RfofiUreov7C"
621
+ }
622
+ },
623
+ {
624
+ "cell_type": "markdown",
625
+ "source": [
626
+ "### Creating .json file from case examples (Question 5)\n",
627
+ "The purpose of this cell is to create a json file based on the previously submitted, graded work of students based on the case file provided by Dr. Blocher in the Wiki."
628
+ ],
629
+ "metadata": {
630
+ "id": "P2h8XtXQo6eO"
631
+ }
632
+ },
633
+ {
634
+ "cell_type": "code",
635
+ "source": [
636
+ "# Context: question\n",
637
+ "\n",
638
+ "q5 = 'Question 5: Describe a new example where you think machine learning could work. This cannot be examples we’ve given in class like customer churn, fraud detection, image or object recognition, disease diagnosis, stock prediction, self-driving cars, etc. t does not need to be completely novel, e.g., you can look for news stories and find something new. I want something that is different from the commonly used examples. You may use an example that you or another student brought up in class if it meets these criteria. Be sure to explain why you think it is an example where ML could work. Note: You do not need to be proven right that ML works. I am mostly interested in your logic and arguments of why you think it is a good idea, drawing on what we’ve discussed in class.'"
639
+ ],
640
+ "metadata": {
641
+ "id": "evpoXMUSpCi4"
642
+ },
643
+ "execution_count": null,
644
+ "outputs": []
645
+ },
646
+ {
647
+ "cell_type": "code",
648
+ "source": [
649
+ "# A-level answers\n",
650
+ "\n",
651
+ "q5_A_answer_1 = 'As I recall, our group discussed that using ML to do intrusion detection for computers. Nowadays, people’s information is digitalized and stored in databases. Since databases are also servers (i.e., computers), there must be some vulnerability. Software is more common to do detection for now, but it can just recognize the intrusion pattern that already known or preset by the programmer. However, this can be accomplished by employing algorithms that can analyze large amounts of data and detect anomalies that may indicate an attempted intrusion. We can feed ML model with network traffic, labeled or not labeled, system logs to let it learn and identify unusual patterns or behavior. With well-learned models, we can use them to find out signs of unauthorized access or even malware. One of the greatest things is that ML model may be able to find some hidden pattern that human has not and predict how and when the intrusion will take place, or even figure out how the malware will function to attach. This trait is one of the most important traits that distinguish ML model from regular software.'\n",
652
+ "q5_A_answer_2 = 'Machine learning can be used in NLP (natural language processing). It is very useful to deal with massive unstructured text data. For example, we want to build a machine learning model to reflect the personality of Einstein according to the letters he wrote in the past decades to his family and friends. We can use machine learning models for NLP to transfer the unstructured text data to usable data first. Then we will use these data to train the model. After training the model by the letters Einstein wrote in the past decades, the model will have the same or similar personality as Einstein. And now we input the test letters into the model again, we expect the model will give us the sentiment scores of each letter (for example: humor-0.732, joyful-0.193, fear-0.015, sad-0.002). After we got the scores for each letter, we use statistical methods to analyze the scores and we will finally get an aggregate personality of Einstein.'\n",
653
+ "q5_A_answer_3 = 'The sports world is a place where I generally think machine learning can be well-utilized, but not in all cases. Something I’m thinking about is fantasy football, in which football fans can draft a team and decide who to “start” on their team for a given week and who to “sit.” NFL pundits and commentators can absolutely give ideas on who would be optimal, and do a great job at it, but when it comes to actual predictions for how many points a particular player might score in a given week, based on opponent average stats, expected workload, and other factors, while also measuring that against every other potential player, having an algorithm to back this up is extremely helpful. Sports betting can also benefit from these types of algorithms, and being able to accurately predict something as volatile as sports could have major business (and fandom) implications.'\n",
654
+ "\n",
655
+ "# List creation\n",
656
+ "\n",
657
+ "q5_A_answers_list = [q5_A_answer_1, q5_A_answer_2, q5_A_answer_3]\n"
658
+ ],
659
+ "metadata": {
660
+ "id": "B9abYhEupKA_"
661
+ },
662
+ "execution_count": null,
663
+ "outputs": []
664
+ },
665
+ {
666
+ "cell_type": "code",
667
+ "source": [
668
+ "# B-level answers\n",
669
+ "\n",
670
+ "q5_B_answer_1 = 'Machine learning can work well when it comes to finance organization, planning, and tracking. Since machine learning works well with statistical inference, it’s useful in decision making. For things such as planning and organizing, one example that machine learning can work well in is identifying price values for toiletries, groceries, and necessity items across various store within a specific location and output the best location for the user. We know that certain retailers mark up prices for the items on their shelfs. Some items are more expensive in one location than others and often time, certain prices are not posted online. In addition to that, traveling to different stores and compare prices are very inefficient because it can waste gas and time. To solve the problem, the algorithm will examine the user’s location and the item(s) that the user wants and output the best stores for the items. How this item work is that they take in data from all the closest stores, rank each item based on value, distance, and availability. On top of that, if users compile a lists of necessity items and output them in the model, the model can use data and compare stores that have more than one item on the list plus the other features (value, distance, and availability), and recommend the place, or places if item are not available, that could cut down time and expense for the users.'\n",
671
+ "q5_B_answer_2 = 'Natural gas consumption has rebounded in the last year, with economic recovery and increased extreme weather leading to increased demand for natural gas, resulting in a tight market and soaring prices. Gas supply agencies want to use ML to predict the deliverability of natural gas storage in depleted reservoirs. First, natural gas is most commonly stored in underground formations: 1) depleted reservoirs, 2) aquifers, and 3) salt caverns. A depleted reservoir must have some elements of a good oil and gas formation to be converted to a subsurface natural gas reservoir, such as good porosity and permeability, the presence of good seal rock, and the presence of cap rock. I think this thing can be achieved with ML because there is a huge amount of data collected from natural gas extraction sites around the world since 1915, and this data provides a good basis and realizability for our research. We can test and build models from this data so that we can use ML to make predictions.'\n",
672
+ "\n",
673
+ "# List creation\n",
674
+ "\n",
675
+ "q5_B_answers_list = [q5_B_answer_1, q5_B_answer_2]"
676
+ ],
677
+ "metadata": {
678
+ "id": "r0_Tt0lspkQ-"
679
+ },
680
+ "execution_count": null,
681
+ "outputs": []
682
+ },
683
+ {
684
+ "cell_type": "code",
685
+ "source": [
686
+ "# C-level answers\n",
687
+ "\n",
688
+ "q5_C_answer_1 = 'One example where ML would work in is speech recognition, converting speech into text.'\n",
689
+ "q5_C_answer_2 = 'ML may work well with education industry. Teachers can utilize ML for lesson preparation, such as analyze academic data, recommendation of high-quality teaching resources, on-demand generation of teaching plans. Others might be like intelligent teaching assistants instead of an actual person TA, or dynamically adjust the teaching content based on comprehensive factors such as learning situation analysis and learner style, and student feedback.'\n",
690
+ "\n",
691
+ "# List creation\n",
692
+ "\n",
693
+ "q5_C_answers_list = [q5_C_answer_1, q5_C_answer_2]"
694
+ ],
695
+ "metadata": {
696
+ "id": "oRlId1mtp46L"
697
+ },
698
+ "execution_count": null,
699
+ "outputs": []
700
+ },
701
+ {
702
+ "cell_type": "code",
703
+ "source": [
704
+ "q5_Q_and_A = [\"Question:\", q5, \"A-level Answers\", q5_A_answers_list, \"B-level Answers\", q5_B_answers_list, \"C-level Answers\", q5_C_answers_list]"
705
+ ],
706
+ "metadata": {
707
+ "id": "XJC2UNxSqHBp"
708
+ },
709
+ "execution_count": null,
710
+ "outputs": []
711
+ },
712
+ {
713
+ "cell_type": "code",
714
+ "source": [
715
+ "import json\n",
716
+ "from google.colab import files\n",
717
+ "\n",
718
+ "def save_example_answers(examples, filename='wiki_ABC_Q5examples.json'):\n",
719
+ " with open(filename, 'w') as file:\n",
720
+ " json.dump(examples, file)\n",
721
+ " files.download(filename)\n",
722
+ "\n",
723
+ "save_example_answers(q5_Q_and_A)"
724
+ ],
725
+ "metadata": {
726
+ "colab": {
727
+ "base_uri": "https://localhost:8080/",
728
+ "height": 17
729
+ },
730
+ "outputId": "3e565b3d-804c-4b5e-acc8-efeb955c6c14",
731
+ "id": "MRcW9_1MqPG-"
732
+ },
733
+ "execution_count": null,
734
+ "outputs": [
735
+ {
736
+ "output_type": "display_data",
737
+ "data": {
738
+ "text/plain": [
739
+ "<IPython.core.display.Javascript object>"
740
+ ],
741
+ "application/javascript": [
742
+ "\n",
743
+ " async function download(id, filename, size) {\n",
744
+ " if (!google.colab.kernel.accessAllowed) {\n",
745
+ " return;\n",
746
+ " }\n",
747
+ " const div = document.createElement('div');\n",
748
+ " const label = document.createElement('label');\n",
749
+ " label.textContent = `Downloading \"${filename}\": `;\n",
750
+ " div.appendChild(label);\n",
751
+ " const progress = document.createElement('progress');\n",
752
+ " progress.max = size;\n",
753
+ " div.appendChild(progress);\n",
754
+ " document.body.appendChild(div);\n",
755
+ "\n",
756
+ " const buffers = [];\n",
757
+ " let downloaded = 0;\n",
758
+ "\n",
759
+ " const channel = await google.colab.kernel.comms.open(id);\n",
760
+ " // Send a message to notify the kernel that we're ready.\n",
761
+ " channel.send({})\n",
762
+ "\n",
763
+ " for await (const message of channel.messages) {\n",
764
+ " // Send a message to notify the kernel that we're ready.\n",
765
+ " channel.send({})\n",
766
+ " if (message.buffers) {\n",
767
+ " for (const buffer of message.buffers) {\n",
768
+ " buffers.push(buffer);\n",
769
+ " downloaded += buffer.byteLength;\n",
770
+ " progress.value = downloaded;\n",
771
+ " }\n",
772
+ " }\n",
773
+ " }\n",
774
+ " const blob = new Blob(buffers, {type: 'application/binary'});\n",
775
+ " const a = document.createElement('a');\n",
776
+ " a.href = window.URL.createObjectURL(blob);\n",
777
+ " a.download = filename;\n",
778
+ " div.appendChild(a);\n",
779
+ " a.click();\n",
780
+ " div.remove();\n",
781
+ " }\n",
782
+ " "
783
+ ]
784
+ },
785
+ "metadata": {}
786
+ },
787
+ {
788
+ "output_type": "display_data",
789
+ "data": {
790
+ "text/plain": [
791
+ "<IPython.core.display.Javascript object>"
792
+ ],
793
+ "application/javascript": [
794
+ "download(\"download_8ec6f24c-9653-4335-8c1b-766926434399\", \"wiki_ABC_Q2examples.json\", 5931)"
795
+ ]
796
+ },
797
+ "metadata": {}
798
+ }
799
+ ]
800
+ },
801
+ {
802
+ "cell_type": "markdown",
803
+ "source": [
804
+ " # Exploring with Langchain Example Sets, FewShotPrompts and Example Selectors\n",
805
+ " Here we're exploring another avenue for this goal to see if we can yield better results than the vector stores. Again, we are using question 2 from the Wiki to test."
806
+ ],
807
+ "metadata": {
808
+ "id": "PpKRr5Vw4_0F"
809
+ }
810
+ },
811
+ {
812
+ "cell_type": "code",
813
+ "source": [
814
+ "# Context: question\n",
815
+ "\n",
816
+ "q2 = 'Question 2: Why is machine learning so important for businesses? Answer this question generally (i.e. such that it applies to many or at least most businesses).'"
817
+ ],
818
+ "metadata": {
819
+ "id": "-kmMFUaLs_Q1"
820
+ },
821
+ "execution_count": 7,
822
+ "outputs": []
823
+ },
824
+ {
825
+ "cell_type": "code",
826
+ "source": [
827
+ "from langchain.prompts.few_shot import FewShotPromptTemplate\n",
828
+ "from langchain.prompts.prompt import PromptTemplate\n",
829
+ "\n",
830
+ "examples = [\n",
831
+ " {\n",
832
+ " \"question\": f\"\"\" Please grade the following student's answer to the question ({q2})\n",
833
+ " on an A-, B-, C-level grading scale: {q2_A_answer_1}.\"\"\",\n",
834
+ " \"answer\": \"This student should recieve an A-level grade.\"\n",
835
+ " },\n",
836
+ " {\n",
837
+ " \"question\": f\"\"\" Please grade the following student's answer to the question ({q2})\n",
838
+ " on an A-, B-, C-level grading scale: {q2_A_answer_2}.\"\"\",\n",
839
+ " \"answer\": \"The student should recieve an A-level grade.\"\n",
840
+ " },\n",
841
+ " {\n",
842
+ " \"question\": f\"\"\" Please grade the following student's answer to the question ({q2})\n",
843
+ " on an A-, B-, C-level grading scale: {q2_B_answer_1}.\"\"\",\n",
844
+ " \"answer\": \"This student should recieve a B-level grade.\"\n",
845
+ " },\n",
846
+ " {\n",
847
+ " \"question\": f\"\"\" Please grade the following student's answer to the question ({q2})\n",
848
+ " on an A-, B-, C-level grading scale: {q2_B_answer_2}.\"\"\",\n",
849
+ " \"answer\": \"This student should recieve a B-level grade.\"\n",
850
+ " },\n",
851
+ " {\n",
852
+ " \"question\": f\"\"\" Please grade the following student's answer to the question ({q2})\n",
853
+ " on an A-, B-, C-level grading scale: {q2_B_answer_3}.\"\"\",\n",
854
+ " \"answer\": \"This student should recieve a B-level grade.\"\n",
855
+ " },\n",
856
+ " {\n",
857
+ " \"question\": f\"\"\" Please grade the following student's answer to the question ({q2})\n",
858
+ " on an A-, B-, C-level grading scale: {q2_C_answer_1}.\"\"\",\n",
859
+ " \"answer\": \"This student should recieve a C-level grade.\"\n",
860
+ " },\n",
861
+ " {\n",
862
+ " \"question\": f\"\"\" Please grade the following student's answer to the question ({q2})\n",
863
+ " on an A-, B-, C-level grading scale: {q2_C_answer_2}.\"\"\",\n",
864
+ " \"answer\": \"This student should recieve a C-level grade.\"\n",
865
+ " }\n",
866
+ "]"
867
+ ],
868
+ "metadata": {
869
+ "id": "1idnQLYW-697"
870
+ },
871
+ "execution_count": 12,
872
+ "outputs": []
873
+ },
874
+ {
875
+ "cell_type": "code",
876
+ "source": [
877
+ "# Example selector\n",
878
+ "from langchain.prompts.example_selector import SemanticSimilarityExampleSelector, MaxMarginalRelevanceExampleSelector, NGramOverlapExampleSelector\n",
879
+ "from langchain.vectorstores import Chroma\n",
880
+ "from langchain.embeddings import OpenAIEmbeddings\n",
881
+ "\n",
882
+ "\n",
883
+ "example_selector_semantic = SemanticSimilarityExampleSelector.from_examples(\n",
884
+ " # This is the list of examples available to select from.\n",
885
+ " examples,\n",
886
+ " # This is the embedding class used to produce embeddings which are used to measure semantic similarity.\n",
887
+ " OpenAIEmbeddings(),\n",
888
+ " # This is the VectorStore class that is used to store the embeddings and do a similarity search over.\n",
889
+ " Chroma,\n",
890
+ " # This is the number of examples to produce.\n",
891
+ " k=1\n",
892
+ ")\n",
893
+ "\n",
894
+ "example_selector_mmr = MaxMarginalRelevanceExampleSelector.from_examples(\n",
895
+ " # This is the list of examples available to select from.\n",
896
+ " examples,\n",
897
+ " # This is the embedding class used to produce embeddings which are used to measure semantic similarity.\n",
898
+ " OpenAIEmbeddings(),\n",
899
+ " # This is the VectorStore class that is used to store the embeddings and do a similarity search over.\n",
900
+ " Chroma,\n",
901
+ " # This is the number of examples to produce.\n",
902
+ " k=1,\n",
903
+ ")\n",
904
+ "\n",
905
+ "example_selector_ngram = NGramOverlapExampleSelector(\n",
906
+ " # These are the examples it has available to choose from.\n",
907
+ " examples=examples,\n",
908
+ " # This is the PromptTemplate being used to format the examples.\n",
909
+ " example_prompt=example_prompt,\n",
910
+ " # This is the threshold, at which selector stops.\n",
911
+ " # It is set to -1.0 by default.\n",
912
+ " threshold=-1.0,\n",
913
+ " # For negative threshold:\n",
914
+ " # Selector sorts examples by ngram overlap score, and excludes none.\n",
915
+ " # For threshold greater than 1.0:\n",
916
+ " # Selector excludes all examples, and returns an empty list.\n",
917
+ " # For threshold equal to 0.0:\n",
918
+ " # Selector sorts examples by ngram overlap score,\n",
919
+ " # and excludes those with no ngram overlap with input.\n",
920
+ ")\n",
921
+ "\n"
922
+ ],
923
+ "metadata": {
924
+ "id": "Y7vNiHKhuIjm"
925
+ },
926
+ "execution_count": 15,
927
+ "outputs": []
928
+ },
929
+ {
930
+ "cell_type": "code",
931
+ "source": [
932
+ "example_prompt = PromptTemplate(input_variables=[\"question\", \"answer\"], template=\"Question: {question}\\n{answer}\")\n",
933
+ "\n",
934
+ "print(example_prompt.format(**examples[0]))"
935
+ ],
936
+ "metadata": {
937
+ "id": "x2dCmQdACwGi"
938
+ },
939
+ "execution_count": null,
940
+ "outputs": []
941
+ },
942
+ {
943
+ "cell_type": "code",
944
+ "source": [
945
+ "prompt = FewShotPromptTemplate(\n",
946
+ " #examples=examples,\n",
947
+ " example_selector = example_selector_semantic,\n",
948
+ " example_prompt = example_prompt,\n",
949
+ " suffix=\"Question: {input}\",\n",
950
+ " input_variables=[\"input\"]\n",
951
+ ")\n",
952
+ "\n",
953
+ "print(prompt.format(input=f\"\"\" Please grade the following student's answer to the question ({q2})\n",
954
+ " on an A-, B-, C-level grading scale: {example1}.\"\"\"))"
955
+ ],
956
+ "metadata": {
957
+ "id": "Uc_BvY0rBZgx"
958
+ },
959
+ "execution_count": null,
960
+ "outputs": []
961
+ },
962
+ {
963
+ "cell_type": "code",
964
+ "source": [
965
+ "prompt_semantic = FewShotPromptTemplate(\n",
966
+ " #examples=examples,\n",
967
+ " example_selector = example_selector_semantic,\n",
968
+ " example_prompt = example_prompt,\n",
969
+ " suffix=\"Question: {input}\",\n",
970
+ " input_variables=[\"input\"]\n",
971
+ ")\n",
972
+ "\n",
973
+ "prompt_mmr = FewShotPromptTemplate(\n",
974
+ " #examples=examples,\n",
975
+ " example_selector = example_selector_mmr,\n",
976
+ " example_prompt = example_prompt,\n",
977
+ " suffix=\"Question: {input}\",\n",
978
+ " input_variables=[\"input\"]\n",
979
+ ")\n",
980
+ "\n",
981
+ "prompt_ngram = FewShotPromptTemplate(\n",
982
+ " #examples=examples,\n",
983
+ " example_selector = example_selector_ngram,\n",
984
+ " example_prompt = example_prompt,\n",
985
+ " suffix=\"Question: {input}\",\n",
986
+ " input_variables=[\"input\"]\n",
987
+ ")"
988
+ ],
989
+ "metadata": {
990
+ "id": "k1laCoAPxhcH"
991
+ },
992
+ "execution_count": 18,
993
+ "outputs": []
994
+ },
995
+ {
996
+ "cell_type": "code",
997
+ "source": [
998
+ "# First using the Example Set to test without an example selector, just the base OpenAI model.\n",
999
+ "from langchain.llms import OpenAI\n",
1000
+ "base_llm = OpenAI()\n",
1001
+ "base_llm(prompt.format(input=f\"\"\" Please grade the following students' answers to the question ({q2})\n",
1002
+ " on an A-, B-, C-level grading scale: student 1: {example1}, student 2: {example2}, and student 3: {example3}. Grade based on the following advice from the professor:\n",
1003
+ " One aspect of it was being specific. The poor answers (C) have a lot of platitudes, the better answers give\n",
1004
+ " specific examples (B and A). Secondly, they should discuss automation and/or prediction specifically. Those are\n",
1005
+ " the things that ML does (A and B answers), it is not 'technology' broadly (what a C answer might say).\n",
1006
+ " The difference between A and B answers should be in their complexity, specificity, thoughfulness\n",
1007
+ " , and general insight on the subject. However, keep in mind that these are short answer questions.\n",
1008
+ " The number of examples or the legnth of the response is not as important as the content itself. After assigning your grade, provide a brief\n",
1009
+ " explanation on your reasoning for assigning the grade based on the professor's advice, and\n",
1010
+ " what differentiated the answers from the higher grade level (if applicable). There should be three grades assigned in your response, and assign based on the order the\n",
1011
+ " answers were passed in.\"\"\"))"
1012
+ ],
1013
+ "metadata": {
1014
+ "id": "rUdx2axQuec1"
1015
+ },
1016
+ "execution_count": null,
1017
+ "outputs": []
1018
+ },
1019
+ {
1020
+ "cell_type": "code",
1021
+ "source": [
1022
+ "# Example Selector 1:\n",
1023
+ "\n",
1024
+ "base_llm(prompt_mmr.format(input=f\"\"\" Please grade the following students' answers to the question ({q2})\n",
1025
+ " on an A-, B-, C-level grading scale: student 1: {example1}, student 2: {example2}, and student 3: {example3}. Grade based on the following advice from the professor:\n",
1026
+ " One aspect of it was being specific. The poor answers (C) have a lot of platitudes, the better answers give\n",
1027
+ " specific examples (B and A). Secondly, they should discuss automation and/or prediction specifically. Those are\n",
1028
+ " the things that ML does (A and B answers), it is not 'technology' broadly (what a C answer might say).\n",
1029
+ " The difference between A and B answers should be in their complexity, specificity, thoughfulness\n",
1030
+ " , and general insight on the subject. However, keep in mind that these are short answer questions.\n",
1031
+ " The number of examples or the legnth of the response is not as important as the content itself. After assigning your grade, provide a brief\n",
1032
+ " explanation on your reasoning for assigning the grade based on the professor's advice, and\n",
1033
+ " what differentiated the answers from the higher grade level (if applicable). There should be three grades assigned in your response, and assign based on the order the\n",
1034
+ " answers were passed in.\"\"\"))"
1035
+ ],
1036
+ "metadata": {
1037
+ "id": "fe24IOslyITA"
1038
+ },
1039
+ "execution_count": null,
1040
+ "outputs": []
1041
+ },
1042
+ {
1043
+ "cell_type": "code",
1044
+ "source": [
1045
+ "# Example selector 2:\n",
1046
+ "\n",
1047
+ "base_llm(prompt_ngram.format(input=f\"\"\" Please grade the following students' answers to the question ({q2})\n",
1048
+ " on an A-, B-, C-level grading scale: student 1: {example1}, student 2: {example2}, and student 3: {example3}. Grade based on the following advice from the professor:\n",
1049
+ " One aspect of it was being specific. The poor answers (C) have a lot of platitudes, the better answers give\n",
1050
+ " specific examples (B and A). Secondly, they should discuss automation and/or prediction specifically. Those are\n",
1051
+ " the things that ML does (A and B answers), it is not 'technology' broadly (what a C answer might say).\n",
1052
+ " The difference between A and B answers should be in their complexity, specificity, thoughfulness\n",
1053
+ " , and general insight on the subject. However, keep in mind that these are short answer questions.\n",
1054
+ " The number of examples or the legnth of the response is not as important as the content itself. After assigning your grade, provide a brief\n",
1055
+ " explanation on your reasoning for assigning the grade based on the professor's advice, and\n",
1056
+ " what differentiated the answers from the higher grade level (if applicable). There should be three grades assigned in your response, and assign based on the order the\n",
1057
+ " answers were passed in.\"\"\"))"
1058
+ ],
1059
+ "metadata": {
1060
+ "id": "dXwJa30wyL4S"
1061
+ },
1062
+ "execution_count": null,
1063
+ "outputs": []
1064
+ },
1065
+ {
1066
+ "cell_type": "markdown",
1067
+ "source": [
1068
+ "### Results from the ngram example selector with the following prompt (yielded the best results (relatively):\n",
1069
+ "\n",
1070
+ "\n",
1071
+ "\n",
1072
+ "```\n",
1073
+ "input=f\"\"\" Please grade the following student's answer to the question ({q2})\n",
1074
+ " on an A-, B-, C-level grading scale: {example3}. Grade based on the following advice from the professor:\n",
1075
+ " One aspect of it was being specific. The poor answers have a lot of platitudes, the better answers give\n",
1076
+ " specific examples. Secondly, they should discuss automation and/or prediction specifically. Those are\n",
1077
+ " the things that ML does, it is not 'technology' broadly. The best answers\n",
1078
+ " (A) should be more complex and specific in answering the question and meeting that criteria\n",
1079
+ " than the better answers (B), though length may not be a determinant. After assigning your grade, provide a brief\n",
1080
+ " explanation on your reasoning for assigning the grade based on the professor's advice.\"\"\"\n",
1081
+ "```\n",
1082
+ "\n",
1083
+ "\n",
1084
+ "\n",
1085
+ "1. Example 1 (A-level grade): \"This student should recieve a B-level grade. The student's answer includes specific examples of how machine learning can be used in business to increase efficiency and profit margins, as well as how machine learning can automate processes previously done by humans. The student also mentions the importance of prediction in machine learning, which is one of its core functions.\" (incorrect)\n",
1086
+ "2. Example 2 (B-level grade): \"This student should recieve a B-level grade. The student's answer provides specific examples of how ML can be used in business such as NLP and predictive analysis. The student also references automation and prediction which are key aspects of ML.\" (correct)\n",
1087
+ "3. Example 3 (C-level grade): \"This student should recieve a B-level grade. The student's answer provides a specific example about how machine learning can help businesses save time and money by providing more accurate predictions. It also mentions the three main innovations that machine learning brings to the table (prediction is cheap, more accurate, and automated).\" (incorrect)"
1088
+ ],
1089
+ "metadata": {
1090
+ "id": "bKqtzH8hcoHD"
1091
+ }
1092
+ },
1093
+ {
1094
+ "cell_type": "markdown",
1095
+ "source": [
1096
+ "# Exploring Chat Prompt Templates and System/Human Messages\n",
1097
+ "This section is unfinalized as we are not sure if it is neccessary for the goals of this project, and we ran into errors which we were unsuccessful in debugging. This approach may yield better results but would need to be further devloped and debugged."
1098
+ ],
1099
+ "metadata": {
1100
+ "id": "YGWq7cw697Wv"
1101
+ }
1102
+ },
1103
+ {
1104
+ "cell_type": "code",
1105
+ "source": [
1106
+ "from langchain.chat_models import ChatOpenAI\n",
1107
+ "from langchain import PromptTemplate, LLMChain\n",
1108
+ "from langchain.prompts.chat import (\n",
1109
+ " ChatPromptTemplate,\n",
1110
+ " SystemMessagePromptTemplate,\n",
1111
+ " AIMessagePromptTemplate,\n",
1112
+ " HumanMessagePromptTemplate,\n",
1113
+ ")\n",
1114
+ "from langchain.schema import AIMessage, HumanMessage, SystemMessage\n",
1115
+ "\n",
1116
+ "\n",
1117
+ "\n",
1118
+ "from langchain.prompts import (\n",
1119
+ " ChatPromptTemplate,\n",
1120
+ " PromptTemplate,\n",
1121
+ " SystemMessagePromptTemplate,\n",
1122
+ " AIMessagePromptTemplate,\n",
1123
+ " HumanMessagePromptTemplate,\n",
1124
+ ")\n",
1125
+ "from langchain.schema import (\n",
1126
+ " AIMessage,\n",
1127
+ " HumanMessage,\n",
1128
+ " SystemMessage\n",
1129
+ ")"
1130
+ ],
1131
+ "metadata": {
1132
+ "id": "NzK7Yo_OEqlG"
1133
+ },
1134
+ "execution_count": null,
1135
+ "outputs": []
1136
+ },
1137
+ {
1138
+ "cell_type": "code",
1139
+ "source": [
1140
+ "template=\"You are a helpful assistant that grades student responses ({answer}) to questions ({question}) about machine learning.\"\n",
1141
+ "system_message_prompt = SystemMessagePromptTemplate.from_template(template)\n",
1142
+ "human_template=\"{text}\"\n",
1143
+ "human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)"
1144
+ ],
1145
+ "metadata": {
1146
+ "id": "XEsYhpKMFcud"
1147
+ },
1148
+ "execution_count": null,
1149
+ "outputs": []
1150
+ },
1151
+ {
1152
+ "cell_type": "code",
1153
+ "source": [
1154
+ "fprompt=PromptTemplate(\n",
1155
+ " template=\"You are a helpful assistant that grades student responses ({answer}) to questions ({question}) about machine learning.\",\n",
1156
+ " input_variables=[\"question\", \"answer\"],\n",
1157
+ ")\n",
1158
+ "system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)"
1159
+ ],
1160
+ "metadata": {
1161
+ "id": "En5SM-ZHCT8l"
1162
+ },
1163
+ "execution_count": null,
1164
+ "outputs": []
1165
+ },
1166
+ {
1167
+ "cell_type": "code",
1168
+ "source": [
1169
+ "chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])\n",
1170
+ "\n",
1171
+ "# get a chat completion from the formatted messages\n",
1172
+ "chat_prompt.format_prompt(input = [q2, example1], text= \"The student should recieve an A-level grade.\" ).to_messages()"
1173
+ ],
1174
+ "metadata": {
1175
+ "id": "W2zIxXxDFNIh"
1176
+ },
1177
+ "execution_count": null,
1178
+ "outputs": []
1179
+ },
1180
+ {
1181
+ "cell_type": "code",
1182
+ "source": [
1183
+ "example_human = SystemMessagePromptTemplate.from_template(\n",
1184
+ " \"Machine learning is awesome\", additional_kwargs={\"name\": \"example_user\"}\n",
1185
+ ")\n",
1186
+ "example_ai = SystemMessagePromptTemplate.from_template(\n",
1187
+ " \"This student should reieve a C-level grade.\", additional_kwargs={\"name\": \"example_assistant\"}\n",
1188
+ ")\n",
1189
+ "human_template = \"{text}\"\n",
1190
+ "human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)"
1191
+ ],
1192
+ "metadata": {
1193
+ "id": "b_r2Y7YXTa0R"
1194
+ },
1195
+ "execution_count": null,
1196
+ "outputs": []
1197
+ },
1198
+ {
1199
+ "cell_type": "code",
1200
+ "source": [
1201
+ "chat = ChatOpenAI(temperature=0)"
1202
+ ],
1203
+ "metadata": {
1204
+ "id": "R2XA1z27cl6V"
1205
+ },
1206
+ "execution_count": null,
1207
+ "outputs": []
1208
+ },
1209
+ {
1210
+ "cell_type": "code",
1211
+ "source": [
1212
+ "chat_prompt = ChatPromptTemplate.from_messages(\n",
1213
+ " [system_message_prompt, example_human, example_ai, human_message_prompt]\n",
1214
+ ")\n",
1215
+ "chain = LLMChain(llm=chat, prompt=chat_prompt)\n",
1216
+ "# get a chat completion from the formatted messages\n",
1217
+ "text_dict = {'text': 'Please grade the following students response to question 2 based on the A-, B-, C-level grading scale'}\n",
1218
+ "chain.run(text_dict)"
1219
+ ],
1220
+ "metadata": {
1221
+ "id": "3WNMa70jVuf5"
1222
+ },
1223
+ "execution_count": null,
1224
+ "outputs": []
1225
+ },
1226
+ {
1227
+ "cell_type": "code",
1228
+ "source": [
1229
+ "\n",
1230
+ "query = f\"\"\" Please grade the following students answer: {example1} to the question ({q2}).\n",
1231
+ "The uploaded json should serve as as examples of A, B, and C level answers. In the document, the\n",
1232
+ "original question is printed, as well as examples of previous student answers that have recieved\n",
1233
+ "A, B, and C grades (labeled accordingly)\"\"\"\n",
1234
+ "grades = qa.run(query)\n",
1235
+ "print(grades)"
1236
+ ],
1237
+ "metadata": {
1238
+ "id": "naKuTxKa2-U8"
1239
+ },
1240
+ "execution_count": null,
1241
+ "outputs": []
1242
+ },
1243
+ {
1244
+ "cell_type": "markdown",
1245
+ "source": [
1246
+ "## Conclusions based on Question 2\n",
1247
+ "\n",
1248
+ "Using different types of chains and example selectors made minor differences, but the results were still not entirely correct, and also inconsistent. Using a persona prompt as the system message was semi-useful, as was adding the grading criteria to the prompt. But it did not remedy our problem.\n",
1249
+ "\n",
1250
+ "Moving forward:\n",
1251
+ "\n",
1252
+ "1. Grading criteria helped, but for longer/more complex questions, more context may be needed (i.e., more specifics on what seperates an A from B, B from C, etc. The model did not perform as well in distinguishing between these levels/did not seem to understand the nuances of the grading criteria).\n",
1253
+ "2. The model would likely perform better with a better example set/more context to be trained on.\n",
1254
+ "\n"
1255
+ ],
1256
+ "metadata": {
1257
+ "id": "C7kDC2pe7bNd"
1258
+ }
1259
+ }
1260
+ ],
1261
+ "metadata": {
1262
+ "colab": {
1263
+ "provenance": [],
1264
+ "include_colab_link": true
1265
+ },
1266
+ "kernelspec": {
1267
+ "display_name": "Python 3",
1268
+ "name": "python3"
1269
+ },
1270
+ "language_info": {
1271
+ "name": "python",
1272
+ "version": "3.10.6"
1273
+ }
1274
+ },
1275
+ "nbformat": 4,
1276
+ "nbformat_minor": 0
1277
+ }
lo-achievement/instructor_intr_notebook_grading_training.ipynb ADDED
@@ -0,0 +1,737 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "view-in-github",
7
+ "colab_type": "text"
8
+ },
9
+ "source": [
10
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/adding_grading_levels_to_instructor_nb/instructor_intr_notebook_grading_training.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {
16
+ "collapsed": true,
17
+ "id": "brzvVeAsYiG2"
18
+ },
19
+ "source": [
20
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/main/instructor_intr_notebook.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "markdown",
25
+ "metadata": {
26
+ "id": "WMKrKfx8_3fc"
27
+ },
28
+ "source": [
29
+ "# Instructor Grading and Assessment\n",
30
+ "This notebook executes grading of student submissions based on the examples provided in the [Wiki](https://github.com/vanderbilt-data-science/lo-achievement/wiki/Examples-of-great,-good,-and-poor-answers-to-questions) from Dr. Jesse Blocher. In this iteration, we use the Unstructured File Loader, which cannot proccess .json files (the preferred format). We are working on finding a file loader that allows .json. In this version of the notebook, the model has only been trained on Question 2 from the notebook.\n",
31
+ "\n",
32
+ "To train the model, we used 2 out of the three student example from each grade brack and inputted into a .pdf with clearly defined levels. Then, we used the excluded answers to test the accuracy of the model's grading."
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "markdown",
37
+ "source": [
38
+ "## Grading based on A, B, and C-level answers from previous students to Question 2 from the [Wiki](https://github.com/vanderbilt-data-science/lo-achievement/wiki/Examples-of-great,-good,-and-poor-answers-to-questions):\n",
39
+ "\n",
40
+ "**Question 2:** Why is machine learning so important for businesses? Answer this question generally (i.e. such that it applies to many or at least most businesses)."
41
+ ],
42
+ "metadata": {
43
+ "id": "ZTkNQ-dL5iO5"
44
+ }
45
+ },
46
+ {
47
+ "cell_type": "markdown",
48
+ "source": [
49
+ "### Creating .json file from case examples (Question 2)\n",
50
+ "The purpose of this cell is to create a json file based on the previously submitted, graded work of students based on the case file provided by Dr. Blocher in the Wiki"
51
+ ],
52
+ "metadata": {
53
+ "id": "TYlGEusr64kA"
54
+ }
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "source": [
59
+ "# Context: question\n",
60
+ "\n",
61
+ "q2 = 'Question 2: Why is machine learning so important for businesses? Answer this question generally (i.e. such that it applies to many or at least most businesses).'"
62
+ ],
63
+ "metadata": {
64
+ "id": "-kmMFUaLs_Q1"
65
+ },
66
+ "execution_count": 1,
67
+ "outputs": []
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "source": [
72
+ "# A-level answers\n",
73
+ "\n",
74
+ "A_answer_1 = 'Machine learning is extremely important tool for businesses. It can be used in a variety of ways, but most importantly, it can be used to identify patterns within their data that might not otherwise be identified by human beings. For example, it can understand customer behaviors, optimize logistics, and expand efficiencies throughout the business. Machine learning does not get tired, meaning it can work as long as you want it to. It can sift through massive amounts of data, that no human being can look through in an efficient manner. Machine learning can be used as a tool to identify anomalies when something needs to be checked to save or gain money. The predictions that companies gain from machine learning are cheap, accurate, and automate. These machine learning algorithms can be brought to larger scales to encompass the whole business and its operations. It is important to note, Machine learning is just predictions. Predictions to understand important patterns that could make or break a company since they understand the patterns of their business more. It is an amazing tool, but should be used wisely and carefully because if not, it can expensive, useless, and straight up wrong.'\n",
75
+ "A_answer_2 = 'Machine learning is important for most of the sectors in business. Overall, it gives the company of an overview about what would be the trend for their business industry, and analyze the customer behavior to help business segment their customers groups. Today, many companies have a vast amount of information generated by behavior, computer, events, people, and devices. This massive amount of data is difficult for human to handle, and even if human manages it, it is not profitable as human labor is expensive. Thanks to machine learning, companies can utilize their in-house or even third-party data to make something useful for their business. In medical analysis, for example, with human, it takes a very long time to find patterns in thousands of MRI scans. On the other hand, machines can detect patterns in seconds by entering data as long as the information is correctly labeled or trained properly. Another example would be segmenting customer group. In marketing department, the business could use unsupervised machine learning to cluster their customer segments to generate personalized contents that are relevant for each of individuals.'\n",
76
+ "\n",
77
+ "# List creation\n",
78
+ "\n",
79
+ "A_answers_list = [A_answer_1, A_answer_2]\n"
80
+ ],
81
+ "metadata": {
82
+ "id": "yQT6aExSr1dP"
83
+ },
84
+ "execution_count": 2,
85
+ "outputs": []
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "source": [
90
+ "# B-level answers\n",
91
+ "\n",
92
+ "B_answer_1 = 'Companies use ML models to improve different aspects of their business, like manufacturing, hiring, deployment, advertising, etc. The main goal is to improve productive and increase profitability of the company. The ML models are fed with company and externally available data to help the company optimize its departments and in turn become more financially successful/ productive. For example, using purchasing history, the company can predict who to advertise products to, to increase sales.'\n",
93
+ "B_answer_2 = 'Machine learning allows business to have automated decision, scale, predictive analysis and performance. Machine learning also helps a business have a data strategy. This is how a firm uses data, data infrastructure, governance, etc. to accomplish its strategic goals and maintain/grow their competitive advantage within their industry.'\n",
94
+ "B_answer_3 = 'The short answer is ML can help make decisions for businesses. To be clarified, ML does not make decisions for businesses. I mean it can, but people have not trusted ML enough yet and ML has not been that good to let it directly make business decisions. Business people only use ML to help themselves get intuitions of how decisions should be made and make predictions of results they might get based on their decisions. For example, if a business tries to launch a new product, it will use ML to test whether it will work or not on a small scale before it is introduced to a large scale. People called this step piloting. In this step, people collect data that is generated by using the pilot product and analyze their next move. They could tweak some features based on the feedback. If they think the data in their interest shows the product performs well, they might predict that this product will be successful when it is introduced on a large scale. Then, they will launch it.'\n",
95
+ "# List creation\n",
96
+ "\n",
97
+ "B_answers_list = [B_answer_1, B_answer_2, B_answer_3]"
98
+ ],
99
+ "metadata": {
100
+ "id": "KB1CmeRwtRvf"
101
+ },
102
+ "execution_count": 3,
103
+ "outputs": []
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "source": [
108
+ "# C-level answers\n",
109
+ "\n",
110
+ "C_answer_1 = 'Machine learning powers many of the services we use today, such as the recommendation systems of Spotify and Netflix; search engines such as Google and Bing; social media such as TikTok and Instagram; voices such as Siri and Alexa, the list can go on. All these examples show that machine learning is already starting to play a pivotal role in today\"s data-rich world. Machines can help us sift through useful information that can lead to big breakthroughs, and we have seen the widespread use of this technology in various industries such as finance, healthcare, insurance, manufacturing, transformational change, etc.'\n",
111
+ "C_answer_3 = 'As technology advanced, there are tons of new data generated and stored. All industries experienced this surge in data, including business. There is a huge amount of business data stored and waited in the database of each firm and they need solutions to utilize these data. Machine learning is a very promising approach for firms to puts these data in and output a meaning pattern or result that could help the firms with their existing work. This could turn into a working product or provide insights that could enhance the efficiency of the company’s workflow. With machine learning, a firm could either enter a new market with the new product or save time and effort with the meaningful insights. Achieving these possibilities with the data they already owned is a low effort but high reward action. This is the reason machine learning is valued by many businesses recently.'\n",
112
+ "\n",
113
+ "# List creation\n",
114
+ "\n",
115
+ "C_answers_list = [C_answer_1, C_answer_3]"
116
+ ],
117
+ "metadata": {
118
+ "id": "3diAz43othjc"
119
+ },
120
+ "execution_count": 4,
121
+ "outputs": []
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "source": [
126
+ "Q_and_A = [\"Question:\", q2, \"A-level Answers\", A_answers_list, \"B-level Answers\", B_answers_list, \"C-level Answers\", C_answers_list]"
127
+ ],
128
+ "metadata": {
129
+ "id": "oAnrMSU6u9do"
130
+ },
131
+ "execution_count": 5,
132
+ "outputs": []
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "source": [
137
+ "import json\n",
138
+ "from google.colab import files\n",
139
+ "\n",
140
+ "def save_example_answers(examples, filename='wiki_ABC_Q2examples.json'):\n",
141
+ " with open(filename, 'w') as file:\n",
142
+ " json.dump(examples, file)\n",
143
+ " files.download(filename)\n",
144
+ "\n",
145
+ "save_example_answers(Q_and_A)"
146
+ ],
147
+ "metadata": {
148
+ "colab": {
149
+ "base_uri": "https://localhost:8080/",
150
+ "height": 17
151
+ },
152
+ "id": "B16iYMEnri9s",
153
+ "outputId": "3e565b3d-804c-4b5e-acc8-efeb955c6c14"
154
+ },
155
+ "execution_count": 6,
156
+ "outputs": [
157
+ {
158
+ "output_type": "display_data",
159
+ "data": {
160
+ "text/plain": [
161
+ "<IPython.core.display.Javascript object>"
162
+ ],
163
+ "application/javascript": [
164
+ "\n",
165
+ " async function download(id, filename, size) {\n",
166
+ " if (!google.colab.kernel.accessAllowed) {\n",
167
+ " return;\n",
168
+ " }\n",
169
+ " const div = document.createElement('div');\n",
170
+ " const label = document.createElement('label');\n",
171
+ " label.textContent = `Downloading \"${filename}\": `;\n",
172
+ " div.appendChild(label);\n",
173
+ " const progress = document.createElement('progress');\n",
174
+ " progress.max = size;\n",
175
+ " div.appendChild(progress);\n",
176
+ " document.body.appendChild(div);\n",
177
+ "\n",
178
+ " const buffers = [];\n",
179
+ " let downloaded = 0;\n",
180
+ "\n",
181
+ " const channel = await google.colab.kernel.comms.open(id);\n",
182
+ " // Send a message to notify the kernel that we're ready.\n",
183
+ " channel.send({})\n",
184
+ "\n",
185
+ " for await (const message of channel.messages) {\n",
186
+ " // Send a message to notify the kernel that we're ready.\n",
187
+ " channel.send({})\n",
188
+ " if (message.buffers) {\n",
189
+ " for (const buffer of message.buffers) {\n",
190
+ " buffers.push(buffer);\n",
191
+ " downloaded += buffer.byteLength;\n",
192
+ " progress.value = downloaded;\n",
193
+ " }\n",
194
+ " }\n",
195
+ " }\n",
196
+ " const blob = new Blob(buffers, {type: 'application/binary'});\n",
197
+ " const a = document.createElement('a');\n",
198
+ " a.href = window.URL.createObjectURL(blob);\n",
199
+ " a.download = filename;\n",
200
+ " div.appendChild(a);\n",
201
+ " a.click();\n",
202
+ " div.remove();\n",
203
+ " }\n",
204
+ " "
205
+ ]
206
+ },
207
+ "metadata": {}
208
+ },
209
+ {
210
+ "output_type": "display_data",
211
+ "data": {
212
+ "text/plain": [
213
+ "<IPython.core.display.Javascript object>"
214
+ ],
215
+ "application/javascript": [
216
+ "download(\"download_8ec6f24c-9653-4335-8c1b-766926434399\", \"wiki_ABC_Q2examples.json\", 5931)"
217
+ ]
218
+ },
219
+ "metadata": {}
220
+ }
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "markdown",
225
+ "source": [
226
+ "## **Start here** to interact with model"
227
+ ],
228
+ "metadata": {
229
+ "id": "PpKRr5Vw4_0F"
230
+ }
231
+ },
232
+ {
233
+ "cell_type": "code",
234
+ "source": [
235
+ "! pip install -q langchain=='0.0.229' openai gradio numpy chromadb tiktoken unstructured pdf2image pydantic==\"1.10.8\" jq"
236
+ ],
237
+ "metadata": {
238
+ "id": "UJi1Oy0CyPHD"
239
+ },
240
+ "execution_count": null,
241
+ "outputs": []
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "source": [
246
+ "# import necessary libraries here\n",
247
+ "from getpass import getpass\n",
248
+ "from langchain.llms import OpenAI as openai\n",
249
+ "from langchain.chat_models import ChatOpenAI\n",
250
+ "from langchain.prompts import PromptTemplate\n",
251
+ "from langchain.document_loaders import TextLoader\n",
252
+ "from langchain.indexes import VectorstoreIndexCreator\n",
253
+ "from langchain.text_splitter import CharacterTextSplitter\n",
254
+ "from langchain.embeddings import OpenAIEmbeddings\n",
255
+ "from langchain.schema import SystemMessage, HumanMessage, AIMessage\n",
256
+ "import numpy as np\n",
257
+ "import os\n",
258
+ "from langchain.vectorstores import Chroma\n",
259
+ "from langchain.document_loaders.unstructured import UnstructuredFileLoader\n",
260
+ "from langchain.document_loaders import UnstructuredFileLoader\n",
261
+ "from langchain.chains import VectorDBQA\n",
262
+ "from langchain.document_loaders import JSONLoader\n",
263
+ "import json\n",
264
+ "from pathlib import Path\n",
265
+ "from pprint import pprint"
266
+ ],
267
+ "metadata": {
268
+ "id": "YHytCUoExrYe"
269
+ },
270
+ "execution_count": 18,
271
+ "outputs": []
272
+ },
273
+ {
274
+ "cell_type": "code",
275
+ "source": [
276
+ "from google.colab import files\n",
277
+ "uploaded = files.upload()"
278
+ ],
279
+ "metadata": {
280
+ "colab": {
281
+ "base_uri": "https://localhost:8080/",
282
+ "height": 74
283
+ },
284
+ "id": "Wpt6qsmEw8WP",
285
+ "outputId": "4563fa62-5245-4115-ecb9-326353dba29c"
286
+ },
287
+ "execution_count": 7,
288
+ "outputs": [
289
+ {
290
+ "output_type": "display_data",
291
+ "data": {
292
+ "text/plain": [
293
+ "<IPython.core.display.HTML object>"
294
+ ],
295
+ "text/html": [
296
+ "\n",
297
+ " <input type=\"file\" id=\"files-6f630631-16ee-49df-8de9-29bfa3ec5f7b\" name=\"files[]\" multiple disabled\n",
298
+ " style=\"border:none\" />\n",
299
+ " <output id=\"result-6f630631-16ee-49df-8de9-29bfa3ec5f7b\">\n",
300
+ " Upload widget is only available when the cell has been executed in the\n",
301
+ " current browser session. Please rerun this cell to enable.\n",
302
+ " </output>\n",
303
+ " <script>// Copyright 2017 Google LLC\n",
304
+ "//\n",
305
+ "// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
306
+ "// you may not use this file except in compliance with the License.\n",
307
+ "// You may obtain a copy of the License at\n",
308
+ "//\n",
309
+ "// http://www.apache.org/licenses/LICENSE-2.0\n",
310
+ "//\n",
311
+ "// Unless required by applicable law or agreed to in writing, software\n",
312
+ "// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
313
+ "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
314
+ "// See the License for the specific language governing permissions and\n",
315
+ "// limitations under the License.\n",
316
+ "\n",
317
+ "/**\n",
318
+ " * @fileoverview Helpers for google.colab Python module.\n",
319
+ " */\n",
320
+ "(function(scope) {\n",
321
+ "function span(text, styleAttributes = {}) {\n",
322
+ " const element = document.createElement('span');\n",
323
+ " element.textContent = text;\n",
324
+ " for (const key of Object.keys(styleAttributes)) {\n",
325
+ " element.style[key] = styleAttributes[key];\n",
326
+ " }\n",
327
+ " return element;\n",
328
+ "}\n",
329
+ "\n",
330
+ "// Max number of bytes which will be uploaded at a time.\n",
331
+ "const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
332
+ "\n",
333
+ "function _uploadFiles(inputId, outputId) {\n",
334
+ " const steps = uploadFilesStep(inputId, outputId);\n",
335
+ " const outputElement = document.getElementById(outputId);\n",
336
+ " // Cache steps on the outputElement to make it available for the next call\n",
337
+ " // to uploadFilesContinue from Python.\n",
338
+ " outputElement.steps = steps;\n",
339
+ "\n",
340
+ " return _uploadFilesContinue(outputId);\n",
341
+ "}\n",
342
+ "\n",
343
+ "// This is roughly an async generator (not supported in the browser yet),\n",
344
+ "// where there are multiple asynchronous steps and the Python side is going\n",
345
+ "// to poll for completion of each step.\n",
346
+ "// This uses a Promise to block the python side on completion of each step,\n",
347
+ "// then passes the result of the previous step as the input to the next step.\n",
348
+ "function _uploadFilesContinue(outputId) {\n",
349
+ " const outputElement = document.getElementById(outputId);\n",
350
+ " const steps = outputElement.steps;\n",
351
+ "\n",
352
+ " const next = steps.next(outputElement.lastPromiseValue);\n",
353
+ " return Promise.resolve(next.value.promise).then((value) => {\n",
354
+ " // Cache the last promise value to make it available to the next\n",
355
+ " // step of the generator.\n",
356
+ " outputElement.lastPromiseValue = value;\n",
357
+ " return next.value.response;\n",
358
+ " });\n",
359
+ "}\n",
360
+ "\n",
361
+ "/**\n",
362
+ " * Generator function which is called between each async step of the upload\n",
363
+ " * process.\n",
364
+ " * @param {string} inputId Element ID of the input file picker element.\n",
365
+ " * @param {string} outputId Element ID of the output display.\n",
366
+ " * @return {!Iterable<!Object>} Iterable of next steps.\n",
367
+ " */\n",
368
+ "function* uploadFilesStep(inputId, outputId) {\n",
369
+ " const inputElement = document.getElementById(inputId);\n",
370
+ " inputElement.disabled = false;\n",
371
+ "\n",
372
+ " const outputElement = document.getElementById(outputId);\n",
373
+ " outputElement.innerHTML = '';\n",
374
+ "\n",
375
+ " const pickedPromise = new Promise((resolve) => {\n",
376
+ " inputElement.addEventListener('change', (e) => {\n",
377
+ " resolve(e.target.files);\n",
378
+ " });\n",
379
+ " });\n",
380
+ "\n",
381
+ " const cancel = document.createElement('button');\n",
382
+ " inputElement.parentElement.appendChild(cancel);\n",
383
+ " cancel.textContent = 'Cancel upload';\n",
384
+ " const cancelPromise = new Promise((resolve) => {\n",
385
+ " cancel.onclick = () => {\n",
386
+ " resolve(null);\n",
387
+ " };\n",
388
+ " });\n",
389
+ "\n",
390
+ " // Wait for the user to pick the files.\n",
391
+ " const files = yield {\n",
392
+ " promise: Promise.race([pickedPromise, cancelPromise]),\n",
393
+ " response: {\n",
394
+ " action: 'starting',\n",
395
+ " }\n",
396
+ " };\n",
397
+ "\n",
398
+ " cancel.remove();\n",
399
+ "\n",
400
+ " // Disable the input element since further picks are not allowed.\n",
401
+ " inputElement.disabled = true;\n",
402
+ "\n",
403
+ " if (!files) {\n",
404
+ " return {\n",
405
+ " response: {\n",
406
+ " action: 'complete',\n",
407
+ " }\n",
408
+ " };\n",
409
+ " }\n",
410
+ "\n",
411
+ " for (const file of files) {\n",
412
+ " const li = document.createElement('li');\n",
413
+ " li.append(span(file.name, {fontWeight: 'bold'}));\n",
414
+ " li.append(span(\n",
415
+ " `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
416
+ " `last modified: ${\n",
417
+ " file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
418
+ " 'n/a'} - `));\n",
419
+ " const percent = span('0% done');\n",
420
+ " li.appendChild(percent);\n",
421
+ "\n",
422
+ " outputElement.appendChild(li);\n",
423
+ "\n",
424
+ " const fileDataPromise = new Promise((resolve) => {\n",
425
+ " const reader = new FileReader();\n",
426
+ " reader.onload = (e) => {\n",
427
+ " resolve(e.target.result);\n",
428
+ " };\n",
429
+ " reader.readAsArrayBuffer(file);\n",
430
+ " });\n",
431
+ " // Wait for the data to be ready.\n",
432
+ " let fileData = yield {\n",
433
+ " promise: fileDataPromise,\n",
434
+ " response: {\n",
435
+ " action: 'continue',\n",
436
+ " }\n",
437
+ " };\n",
438
+ "\n",
439
+ " // Use a chunked sending to avoid message size limits. See b/62115660.\n",
440
+ " let position = 0;\n",
441
+ " do {\n",
442
+ " const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
443
+ " const chunk = new Uint8Array(fileData, position, length);\n",
444
+ " position += length;\n",
445
+ "\n",
446
+ " const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
447
+ " yield {\n",
448
+ " response: {\n",
449
+ " action: 'append',\n",
450
+ " file: file.name,\n",
451
+ " data: base64,\n",
452
+ " },\n",
453
+ " };\n",
454
+ "\n",
455
+ " let percentDone = fileData.byteLength === 0 ?\n",
456
+ " 100 :\n",
457
+ " Math.round((position / fileData.byteLength) * 100);\n",
458
+ " percent.textContent = `${percentDone}% done`;\n",
459
+ "\n",
460
+ " } while (position < fileData.byteLength);\n",
461
+ " }\n",
462
+ "\n",
463
+ " // All done.\n",
464
+ " yield {\n",
465
+ " response: {\n",
466
+ " action: 'complete',\n",
467
+ " }\n",
468
+ " };\n",
469
+ "}\n",
470
+ "\n",
471
+ "scope.google = scope.google || {};\n",
472
+ "scope.google.colab = scope.google.colab || {};\n",
473
+ "scope.google.colab._files = {\n",
474
+ " _uploadFiles,\n",
475
+ " _uploadFilesContinue,\n",
476
+ "};\n",
477
+ "})(self);\n",
478
+ "</script> "
479
+ ]
480
+ },
481
+ "metadata": {}
482
+ },
483
+ {
484
+ "output_type": "stream",
485
+ "name": "stdout",
486
+ "text": [
487
+ "Saving wiki_ABC_Q2examples (2).json to wiki_ABC_Q2examples (2).json\n"
488
+ ]
489
+ }
490
+ ]
491
+ },
492
+ {
493
+ "cell_type": "code",
494
+ "source": [
495
+ "# setup open AI api key\n",
496
+ "openai_api_key = getpass()"
497
+ ],
498
+ "metadata": {
499
+ "id": "jVPEFX3ixJnM"
500
+ },
501
+ "execution_count": null,
502
+ "outputs": []
503
+ },
504
+ {
505
+ "cell_type": "code",
506
+ "source": [
507
+ "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
508
+ "openai.api_key = openai_api_key"
509
+ ],
510
+ "metadata": {
511
+ "id": "MO7VuGmrxVAr"
512
+ },
513
+ "execution_count": 19,
514
+ "outputs": []
515
+ },
516
+ {
517
+ "cell_type": "code",
518
+ "source": [
519
+ "mdl_name = 'gpt-3.5-turbo-0301'"
520
+ ],
521
+ "metadata": {
522
+ "id": "4Thxj6Gk1zVS"
523
+ },
524
+ "execution_count": 20,
525
+ "outputs": []
526
+ },
527
+ {
528
+ "cell_type": "code",
529
+ "source": [
530
+ "llm = ChatOpenAI(model='gpt-3.5-turbo-16k')\n",
531
+ "messages = [\n",
532
+ " SystemMessage(content=\"You are a helpful assistant.\"),\n",
533
+ " HumanMessage(content=\"\")\n",
534
+ "]"
535
+ ],
536
+ "metadata": {
537
+ "id": "Sgq9aVqpxZnK"
538
+ },
539
+ "execution_count": 36,
540
+ "outputs": []
541
+ },
542
+ {
543
+ "cell_type": "code",
544
+ "source": [
545
+ "file_path='/content/wiki_ABC_Q2examples (2).json'"
546
+ ],
547
+ "metadata": {
548
+ "id": "Ak7X_ZRba48F"
549
+ },
550
+ "execution_count": null,
551
+ "outputs": []
552
+ },
553
+ {
554
+ "cell_type": "code",
555
+ "source": [
556
+ "data = json.loads(Path(file_path).read_text())\n",
557
+ "data = str(data)"
558
+ ],
559
+ "metadata": {
560
+ "id": "PKqQROVMc6BP"
561
+ },
562
+ "execution_count": 26,
563
+ "outputs": []
564
+ },
565
+ {
566
+ "cell_type": "code",
567
+ "source": [
568
+ "# Contruct Vector Store\n",
569
+ "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
570
+ "texts = text_splitter.split_text(data)\n",
571
+ "\n",
572
+ "embeddings = OpenAIEmbeddings()\n",
573
+ "\n",
574
+ "db = Chroma.from_texts(texts, embeddings)\n",
575
+ "\n",
576
+ "qa = VectorDBQA.from_chain_type(llm=ChatOpenAI(model_name = mdl_name), chain_type=\"stuff\", vectorstore=db, k=1)"
577
+ ],
578
+ "metadata": {
579
+ "id": "i2NadtLlcxur"
580
+ },
581
+ "execution_count": 37,
582
+ "outputs": []
583
+ },
584
+ {
585
+ "cell_type": "code",
586
+ "source": [
587
+ "# A level\n",
588
+ "example1 = 'The most powerful aspect of machine learning is its ability to automate processes. If the goal is well defined and repeatable, an algorithm can be trained to perform that task far faster than any human and often with more reliability. Because of this, businesses can implement machine learning algorithms to solve problems human labor was previously required for, whether that be mental or physical labor. Although implementing machine learning often has high initial costs, because computers do not require payment outside of maintaining their operation, in the long run companies can save money by either making their workers’ tasks more efficient or by entirely automating tasks. This increases the profit margins for those firms.'\n",
589
+ "# B level\n",
590
+ "example2 ='ML systems can help improve access to data while managing compliance. It helps businesspeople deal with data in a more efficient way, since there is numerous data generated per second in such industry. If they use some of the simple classification models with high accuracy, it will save them a lot of time on data cleaning and validation, which are kind of repetitive and time-consuming. For example, NLP provides an easy way for personnel to query business information, understand business processes, and discover new relationships between business data, ideas based on intuition and insight often emerge. Using models to do prediction helps people making wiser decision. Since models can handle way more data than human, newly collected data can feed in the model and get some predictive result as a reference to the decision makers. This is significant because in this industry, time is precious, and traders must decide quickly and precisely. A little negligence will lead to a big mistake, lose a lot of money, and even affect the company\"s reputation. Models can see patterns that are not easy for human to spot, which is also valuable for modify the way people doing analysis and interpret.'\n",
591
+ "# C level\n",
592
+ "example3 = 'The machine learning model (or one a broader view, artificial intelligence) is about prediction. According to the lecture, there are tree main innovations in it. Prediction is cheap, more accurate and automated. As a result, armed with machine learning, businesses could automatically gain much more accurate and powerful capabilities in forecasting, leading to a big savings both in time and money.'\n",
593
+ "\n",
594
+ "# Randomized list of answers\n",
595
+ "training_answers = [example2, example1, example3]"
596
+ ],
597
+ "metadata": {
598
+ "id": "CJxUs8lG12kd"
599
+ },
600
+ "execution_count": 38,
601
+ "outputs": []
602
+ },
603
+ {
604
+ "cell_type": "code",
605
+ "source": [
606
+ "query = f\"\"\" Please grade the following student answers: {training_answers} to the question ({q2}).\n",
607
+ "The uploaded pdf should serve as as examples of A, B, and C level answers. In the document, the\n",
608
+ "original question is printed, as well as examples of previous student answers that have recieved\n",
609
+ "A, B, and C grades (labeled accordingly)\"\"\"\n",
610
+ "\n",
611
+ "query_prefix = \"\"\" The uploaded pdf should serve as as examples of A, B, and C level answers.\n",
612
+ "In the document, the original question is printed, as well as examples of previous student answers that have recieved\n",
613
+ "A, B, and C grades (labeled accordingly).\"\"\"\n",
614
+ "answer = qa.run(query_prefix + query)\n",
615
+ "print(answer)"
616
+ ],
617
+ "metadata": {
618
+ "colab": {
619
+ "base_uri": "https://localhost:8080/"
620
+ },
621
+ "id": "naKuTxKa2-U8",
622
+ "outputId": "fc10a157-071d-4a09-bf4d-315fec1d53e9"
623
+ },
624
+ "execution_count": 56,
625
+ "outputs": [
626
+ {
627
+ "output_type": "stream",
628
+ "name": "stdout",
629
+ "text": [
630
+ "The first answer would be a B-level answer. It mentions the efficiency of using machine learning for data cleaning and validation, as well as the ability to handle large amounts of data and make predictions. However, it could be more specific in terms of how machine learning can benefit businesses in various industries.\n",
631
+ "\n",
632
+ "The second answer would be an A-level answer. It highlights the ability of machine learning to automate processes and save money in the long run, as well as mentioning the reliability and efficiency of algorithms compared to human labor.\n",
633
+ "\n",
634
+ "The third answer would also be an A-level answer. It accurately describes the main innovation of machine learning as being prediction, and how it can lead to cost savings in time and money for businesses. It also hints at the potential for machine learning to improve forecasting capabilities.\n"
635
+ ]
636
+ }
637
+ ]
638
+ },
639
+ {
640
+ "cell_type": "markdown",
641
+ "source": [
642
+ "## Conclusions based on Question 2\n",
643
+ "\n",
644
+ "Moving forward:\n",
645
+ "\n",
646
+ "\n",
647
+ "1. Train the model on the other questions provided in the case example\n",
648
+ "2. Request more student answers from Dr. Blocher\n",
649
+ "3. Request a grading rubric from Dr. Blocher for the case examples, which may help the model gain more context\n",
650
+ "\n"
651
+ ],
652
+ "metadata": {
653
+ "id": "C7kDC2pe7bNd"
654
+ }
655
+ },
656
+ {
657
+ "cell_type": "markdown",
658
+ "source": [
659
+ "### Ouput for query with .pdf\n",
660
+ "\n",
661
+ "We also experimenting with using a .pdf file (before we figured out how to parse .json files). Here is one set of results.\n",
662
+ "\n",
663
+ "The first answer can be graded as a B. It touches on the benefits of machine learning such as improving access to data, efficiency in data management, and faster decision-making. However, the answer could be strengthened by providing more specific examples of how machine learning has benefited businesses.\n",
664
+ "\n",
665
+ "The second answer can be graded as an A. It provides a clear and concise explanation of how machine learning can automate processes and save companies money in the long run. The answer also acknowledges the initial costs of implementing machine learning but emphasizes the potential for increased profit margins.\n",
666
+ "\n",
667
+ "The third answer can also be graded as an A. It highlights the main innovation of machine learning which is prediction and how it can lead to cost and time savings for businesses. The answer is well-organized and provides a clear explanation of why machine learning is important for businesses in general.\n",
668
+ "\n",
669
+ "**Results:**\n",
670
+ "\n",
671
+ "The model did not perform as well as expected. It did not successfully grade any of the questions. The first qyestion should have been graded as an A, second as a B, and third as a C."
672
+ ],
673
+ "metadata": {
674
+ "id": "iJTiGX4aZ91_"
675
+ }
676
+ },
677
+ {
678
+ "cell_type": "markdown",
679
+ "source": [
680
+ "### Output for .json queries\n",
681
+ "\n",
682
+ "**First query:**\n",
683
+ "\n",
684
+ "The first answer would be a B-level answer. It touches on the efficiency and time-saving benefits of machine learning, but could benefit from more specific examples and a deeper explanation of how it can improve business outcomes.\n",
685
+ "\n",
686
+ "The second answer would be an A-level answer. It provides a clear explanation of how machine learning can automate processes and improve efficiency, leading to cost savings and increased profits for businesses.\n",
687
+ "\n",
688
+ "The third answer would also be an A-level answer. It focuses on the predictive power of machine learning and how it can save time and money for businesses by providing more accurate forecasts. It also mentions the automation benefits of machine learning.\n",
689
+ "\n",
690
+ "Notes: Same results as first query using .pdf.\n",
691
+ "\n",
692
+ "**Second query:**\n",
693
+ "\n",
694
+ "Answer 1: B-level answer. The answer talks about how machine learning can automate processes and solve problems that were previously done by human labor. It also mentions how implementing machine learning can save money in the long run.\n",
695
+ "\n",
696
+ "Answer 2: A-level answer. The answer discusses how machine learning can help with data cleaning and validation, which can save time and enhance efficiency. It also mentions how models can handle more data than humans and can provide predictive results for decision-makers. Additionally, it talks about how machines can see patterns that are difficult for humans to spot, which can help with analysis and interpretation.\n",
697
+ "\n",
698
+ "Answer 3: C-level answer. The answer discusses how machine learning can provide accurate and powerful capabilities in forecasting, resulting in savings in time and money. It also talks about the three main innovations in machine learning, which are prediction, accuracy, and automation. The answer provides examples of different industries that have implemented machine learning, showcasing its importance in today's data-rich world.\n",
699
+ "\n",
700
+ "Notes: Graded one answer correctky (answer C).\n",
701
+ "\n",
702
+ "**Third query**\n",
703
+ "\n",
704
+ "The first answer would be a B-level answer. While it touches on some important points such as saving time on data cleaning and validation and using models for prediction, it lacks depth and doesn't provide specific examples or applications for businesses.\n",
705
+ "\n",
706
+ "The second answer would be an A-level answer. It provides a clear and concise explanation of how machine learning can automate processes and save companies money in the long run. It also acknowledges the initial costs of implementing machine learning and how it can increase profit margins.\n",
707
+ "\n",
708
+ "The third answer would also be a B-level answer. While it mentions the three main innovations of machine learning (cheap, more accurate, and automated predictions), it doesn't provide specific examples or applications for businesses and lacks depth in its explanation.\n",
709
+ "\n",
710
+ "Notes: None of the answers correct\n",
711
+ "\n",
712
+ "### General comments:\n",
713
+ "\n",
714
+ "None of the query results were consistent. There were also intsances where the output would be something like \"As an AI language model, I do not have the capability to...\". So more training and prompt engineering is certainly needed."
715
+ ],
716
+ "metadata": {
717
+ "id": "FR4YMpPseoTE"
718
+ }
719
+ }
720
+ ],
721
+ "metadata": {
722
+ "colab": {
723
+ "provenance": [],
724
+ "include_colab_link": true
725
+ },
726
+ "kernelspec": {
727
+ "display_name": "Python 3",
728
+ "name": "python3"
729
+ },
730
+ "language_info": {
731
+ "name": "python",
732
+ "version": "3.10.6"
733
+ }
734
+ },
735
+ "nbformat": 4,
736
+ "nbformat_minor": 0
737
+ }
lo-achievement/instructor_vector_store_creator.ipynb ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/main/instructor_vector_store_creator.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "# Creating a Shared Vector Store (for Instructors)\n",
15
+ "\n",
16
+ "This notebook is for instructors to create a *vector store* which contains all of the information necessary for students to generate their own self-study materials using large language models. It is expected that instructors who will use this notebook know how to run and interact with a Jupyter Notebook, specifically on Google Colab.\n",
17
+ "\n",
18
+ ":::{.callout-info}\n",
19
+ "On Colab, there may be a pop-up saying 'Warning: This notebook was not authored by Google'. In that case, click 'Run anyways'. If you started this notebook from the Vanderbilt Data Science github, then you can trust the code in this notebook.\n",
20
+ ":::"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "markdown",
25
+ "metadata": {},
26
+ "source": [
27
+ "## Setting Up API Access\n",
28
+ "Much of the following code rely on certain *APIs* (application programming interfaces) which have limited access. You will need to get an *API key* for each of those services which will be inserted into the code to let the service know you are an authorized user."
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "markdown",
33
+ "metadata": {},
34
+ "source": [
35
+ "#### OpenAI"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "markdown",
40
+ "metadata": {},
41
+ "source": [
42
+ "First, you will need an **OpenAI API key**. To do this:\n",
43
+ "1. Visit [platform.openai.com/account/api-keys](https://platform.openai.com/account/api-keys) and sign up for an account.\n",
44
+ "2. Click 'Create a secret API key', and give it any name you want.\n",
45
+ "3. Copy the newly created key, either by right-clicking and pressing 'Copy' or using the keyboard shortcut -- Ctrl+C on Windows, Cmd+C on a Mac.\n",
46
+ "\n",
47
+ "Run the following code cell. You'll see a blank text box pop up -- paste your API key there (using the shortcut Ctrl+V on Windows, or Cmd+V if you are using a Mac) and press Enter."
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": null,
53
+ "metadata": {},
54
+ "outputs": [],
55
+ "source": [
56
+ "OPENAI_API_KEY = getpass(\"OpenAI API key: \")"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "markdown",
61
+ "metadata": {},
62
+ "source": [
63
+ "#### DeepLake\n",
64
+ "\n",
65
+ "Next, you will need to input a **DeepLake API key**, found in the DeepLake dashboard at [app.activeloop.ai](https://app.activeloop.ai).\n",
66
+ "\n",
67
+ "1. Click the link above and create an account.\n",
68
+ "2. After making an account, you will be prompted to set a username. Once you have set your username, copy it, run the code below, paste the username into the text box, and press Enter. (This username will be shared with students.)"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": null,
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "DEEPLAKE_USERNAME = input(\"DeepLake username: \")"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "markdown",
82
+ "metadata": {},
83
+ "source": [
84
+ "3. You should then be on the DeepLake dashboard. At the top, click 'Create API token'. You should see an empty table with the columns 'Name', 'Expiration date', and 'Token'.\n",
85
+ "4. Click the 'Create API token' button ![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAJYAAAAwCAYAAADzRIMRAAALoElEQVR4Xu1caXBT1xX+nvZdssFm8+DYgGPWAVMggFkyMWuJyYBDgRICDZ1JO6HJkNKkAYakEHcgSxsyQybDUkqLW9bWCVsh7GFJ2NywhuASJyQ4GCxZaF973nuWbFmSbWHJjjXv/tLy3r3nnvPdc7577nmP8VOD0AQNxFkDjACsOGtU6I7TgAAsAQgJ0YAArISoVehUAJaAgYRoQABWQtQqdBozsLhNpM8J193P4a78FO6q8/CZr8NruQp4BIW2Ww1IALGmD0S6XEjTBkPaOR+y9KGASA6GYWKeVvOB5ffCY70D+5cb4aooBSQqyDuPhTj9J5AaciFSZ0AkVccsgHDDj0MDPrcVPuttuE3kJO6eg7PyKDkKG2SZU6F89BeQqLvQVk/cbGGbBpbfB5/bDNul92AjUCmynoay12xIO+Y1exDhwvapAfe9C7B/VQLHre1QEbhU/V8k56EjgImanFCjwPJ7HXDeOQZr2VuQ6nO4jiWGR5vsVLgguTTgMX3JORZ3zQ2oBy6GvMsYcl6KRicZFVh+coO2LzfBdnE5NMPfh7LHzOTSljCbmDVgL/8nLKcXQjXoDfJg88AQHYrWIgKLA9XVD+Ao3wbdqLUU9gbHLIRwQ3JqwH3vPMwnfg1FjxlQ9flVVHCFAcvvdcJ2fQMcN/4K/eObhdCXnPho0azY0FhzZC4UOc9ClfschUV5WH+hwCKi7vjuEB4cnQ3DxP2Cp2qR+pP7ZtZzmfZPhHZsCRTdnggj9CHA8rlMMB4ogqr3AoFTJTcu4jI7lnPZrq1HyvgdEMkMIX3WAYvyVJYLK+Gz/cDxKqEJGmiOBli+JVJ1giZvaUieKwgsj+U2qj8aidTJBwRe1RyNCtdwGmD5VvXe8UgtPAmJJiOoFQ5Y7DGN5cIK+F1m6Ia/LahM0EBMGjCf/i0YmY681rLg8Q8PLEqEVpeOhG70OiGjHpNKhYtZDbAZevPxXyJ16slg4pQDlvPOcVjOLkGHwhNx1ZTH/hmc5VfA1j4zkmzKfdDZojSuQySoMyNsN3bB59JCnjUDkY5AHd9sgMdc/3A2HfLMUZBo9WB/9aMctsvHwGjHQpWZHRc52TG9jkFQ5uSh6UOVuAzZ7E7ufzQKmiFvUlZ+NHcPByzLxWKCnR2aoSua3VFjF/rxA8yHhsF5qyL0MlEuVPnHoclJi8s4DTuxX3+JzrW6QzNpEVqCX4/pDzDuWM4tCGnPM0gZOyxMXtNeBq7vw6ch6VYC/aRZBK7NqF7/LJguJejw01lhFz6MrOyYbvNSpMxcASpG+FE1y+fLSFlKaAa9Vgcs44HplOyaC+UjU+MgrAvmoyo4bnoh6bYe2sfnQqqQ0jnTFpj3zIHXVQTDrO2QhefUWjy25VQmnRgMgWHBDsha0Jv5mJhOHZ6AuNNBeKuegeGZzZA1ONjngTUZ2qfXckb2oxLWY4/BfRdQj/VD2bNxYD2MrD9mYNm/LqWk+mZKPeysA9b9HX2hL9gVl92gx1wM47YlYDp+gJSnnkd9e/hst+CXZnHh0HO/GKZ9f4as71p4btGpeef9MIzoD49lL8yHX4Dn3i0qEMqBrMdaaPPJyJy4NlgvvQD75b9QiQd9lQ2CIm8rtP16wXpuJP1+Cn62JkxBYSnnGPRDc6ky4xIeHJ4N153LZHw1JJ3XQjduLiRRKkB8vl0wbp4OpJVA03sPag5vgXKEG9o+oT6CB9b0EBC7KxfBtPtPkOZ+AV3+xageyxJFVq/tIMxHXoan6hIoV00lScW0MH8PWW01UiiwXLCcyYf9phOqEaegzmZIN/PpbHcbpwORZjY0BZugSOV9d80hhkL3ciqDOQ/Hpd2g7BKk3UuhKygMsdHDrkcuG//JNHQoulIHrLubGHT8uSUu9VTO60+i5tPdUD5GxugX3WG7q16BqXQ1/KJU8gzjIc9eDWXWSRi3zoJf+xLUg2dRAeGHsJ/dyBmKA92Dd2DauQ6S3r+j+iALXF+9SADNgGbit2AsL8H6xXt0TwYkGVSklrka6t73CeSD4PX8DKqhL9OO5d+wnSom0G9HyqSiiDzFeXMmao5uhXq0G6qcz1C9JR9+zToipgs4hQW4TTiwbERg+9CqreCAqO5TEhVYbBhsKKsqpxLGnUOpYDKXFttiiJiTcF7bSIuEQl8RhT5aCHXAWgbHuVGwlZ0lPd+Gpl9XWM/0Im/thbzvGsi7+2E/Xwj3/ZnQz/gH5Er+XjZ0izstgSwrHW5Od52gHlcJdebDwqnuPrae694WDdLn8U8Tchzr7noG6Qvi83ih9VweldlcJOX6aZVHFzgALEU9AHL3ftEJ+ln7OGWwzXpxOKz/7Y2UeQQw+u5zV8Frs5GFDfA7ilFD4FTUjtUwvLj+Nwemw8c44KlqUyzOiudgPniNQtgpKPQN5TPC9FEqXNXzg+NxYfGrPOimn+UuVqTw90TjWIz+daRMWw6ROLZQ6LwxDTXH/8UBWp3DL8gAyAMeMwAs5cAy2GjxynLPQ5+fR574Yxg3FUL0SB0f9PkOwFgyAZKcb8lzZ9QDJc/PAt41oLuWQwuoj6O4AyugIPkAfkLRWhBY9QAYzVhAb2ievAqp6A2YPn6dvJyadplu6lpK4LIGQdwQWPz3byKKEAn4AdIu7r4N2mFUlkvNW/MuHhxYAwmReLYFiDwvK+8duRUqzSYvSUWQWQMJVLQAmiDvkWUdCMP80iCfC/QhYT12fn8ezMThqHMw8oUwzFwDKY0V9P4RZiqu3Tw05GeR9N9ScIUDK46hMMBRvMw06It2Ql6vWtlZ8S48XgpL2d2IR/ChsP6KefBpV9ivD4B2+oeQhOynJVSP3Q2WwwwdkteR6YarrqGxHFcmwnz6GlRjTkCeHuqRRarMsNRHY0BkFAs5vRvmkDGDHiuUY9U3TKzA4mUtCwlNAePL80zQ5elrvSQDaa9n4CnfDFGnEhhox8n4NqB64wKIs3dDM7hfCD4YaRpVkavCPVYE/bcEWBFDYTzJOyucjXiWhVw15I9D0XcRZF38xD3egruc8mRqnjP4q8OB5a5aSh7pTTAd1kA/4XniFSbKBa0kYv82lF2ktNskYH1dCP3TpRxg+bBG5dK1Xs9eNgYPzlVBNW475Kk5xBl38pyNQKGbsAoyrRP2r1fRsyCLoOkTmvLw4yTHp3x4Hropr4bwL+fVKbBduczpPRCqIpH3WIDVmKzaJ4gPoowW0lPw2kN5UiDd4KrVMR8Os1Czn0L49wOgGl0KVc+u8FbvowNiKzQjZ3PkPNEeKyJ5j2+6gVevo+JVWE6QER116hanLYG2YCW3y4nmiu035xMoN/G7O7ZJ+tKu5wiX+/La/kYEdy7xLDbhqgSjzobfeCUIrAAw2R2V5JGDSC0ogKvyjzB/8lqdHKIMyPvTbmhIHpfIDLQAn5HVhp36IPE63ofx77/hf0rjibw5wq4wFmBFlfUgyeqs7YkWprbgP9yiYlsoOIgP1oJJM+EMFJ0vo2bPZEp33AuKIU5bDf2UxQ2Ify3HirPHiphuiHeCNETBzu8oW+yhE/Dw0FP/utDPLtoB3iFSaghmsuv+5/+DtCsklB9r2PxE7qkAFmJ9WojX8dgr6CxUFfZ7dBkS/09kWRube9My8X3aYtR30/02dUXEBGmijnSaEkb4P3k0EPFIRziETh4Dt8VMoh5CC2UzbWGO5BkzatkMO0Wh0C95DN2aM2m00I8TRChNbk17JM1YTZYmszMVHqZIGnu3ykSa9zAF57WEx79axSJJMEhMj3/xEVF4YDUJ7J7QKcT+wGqtOMIj9gm1S7vu/KEfsQ/MWngpSLu2f0KEb/FLQYLgorDIZuWtZauE1xglxFTto9PQ1xi9wj0wEel9DfVnI7x4rX3Ytk2kTNiL10JmI7wqsk2M21qDtv6rIhvMTHi5bWuZupXHabOX27byPIXh2rcGmuZY7Xt+gvRtpAEBWG2k+GQfVgBWslu4jeb3f6EJmcthjHP3AAAAAElFTkSuQmCC) at the right of the page, choose a name for the token, then click 'Create API token'. (You do not need to change the expiration date.)\n",
86
+ "5. Afterwards, you should see the table look something like this:"
87
+ ]
88
+ },
89
+ {
90
+ "cell_type": "markdown",
91
+ "metadata": {},
92
+ "source": [
93
+ "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAsMAAABrCAYAAACSXI/oAAAgAElEQVR4Xu3dCVhN6QMG8DctSiZC9p0xzFgG2ZUlFMLIkkGWwYx9+BvZMxhjzSTTDJM9JoPIGMvIln2isa9DlsgMkZSi7f7POffe3K5KqZN7u+99nnlmps7yfb/vfPe85zvfORkphA/4oQAFKEABClCAAhSggAEKGDEMG2Crs8oUoAAFKEABClCAApIAwzAPBApQgAIUoAAFKEABgxVgGDbYpmfFKUABClCAAhSgAAUYhnkMUIACFKAABShAAQoYrADDsME2PStOAQpQgAIUoAAFKMAwzGOAAhSgAAUoQAEKUMBgBRiGDbbpWXEKUIACFKAABShAAYZhHgMUoAAFKEABClCAAgYrwDBssE3PilOAAhSgAAUoQAEKMAzzGKAABShAAQpQgAIUMFgBhmGDbXpWnAIUoAAFKEABClCAYZjHAAUoQAEKUIACFKCAwQowDBts07PiFKAABShAAQpQgAIMwzwGKEABClCAAhSgAAUMVoBh2GCbnhWnAAUoQAEKUIACFGAY5jFAAQpQgAIUoAAFKGCwAgzDBtv0rDgFKEABClCAAhSgAMMwjwEKUIACFKAABShAAYMVYBg22KZnxSlAAQpQgAIUoAAFGIZ5DFCAAhSgAAUoQAEKGKyALGH41vbJWHKxFeZ4dEQxDdpXIV6Y+FdLeI+xNVhwVlz3BcTjd96uyDQFNUIZuHw7Bx3Lv3v5Q9eNwuHiczDBWbNXZH97ydG7MX/GQ3RfPAQfm2V//ayuocAJLB16Dm1WjkS9rK7E5ShgAAJJD7Zgxsw/8TidujYfthJfNEkfgX3KAA4OVlEvBWQJw6fnNsbna56h7oAA/OZRB6Yqmritrmi9wxUhfi56icVCG4aAePwOP/8lAj0dNSpsjCKly8JKfTC/A8W7hmEFLsDDfhRK+hzFmDpAXoXhFPwKt6r7MCBsLTQl0qu6aDY3eT0CPWq+gwxXoYB+CSgSn+HhvzFIFop9wccFPimLsGJMNakSlsUroFih9OuTnT6lXyIsLQX0W0C2MPxFSA18fCcCLdYGYWx9Y0lJOwyLXyhnD+zElUgjWFWyRwe7ijBXeb54cg+vLMog9uLvOPJPAdRq2wUNbGKl5a9GlkQTF0dU1/jCibkbjH1Hw5FYvB46tauTo9Ci303K0udUQAx2o65NTveiTREXiQfPTFGqbBHVRV4CoiL+g6JoBVibCifIx0Yobn0f+wPOIrpQZdg7tUR51XH6KjoCUbBB6SKmUP+35bMT+DvqU7T6tAjS7w/C9h8exuyes1B81mYMbVRSWD8RkeHxKFyhuKq/JODJhf3Yc+EpTLSOf+mkLZSphHUUTuw9igcv3uw7ml6vos9h/85LQtlroE3Xm/imhmYYfr2fAhp1E+uyf143KRD8Mq5+qk1K/B0cUe2zQZe2qCXUmx8K5DeB9C4E1cf+/ReFUb2lE5pWtpCq/WYYjsPj8CdItiotfS8Ar/tYRudExc0gqa8Xqd4OTk1Lpw425TdX1ocCeSkgWxgWw0RAt83oNssG3qHL0FK4nasZhhUIwbTWo3D5077oZZuCgytW4b8WqxA4v5nUuf0GlsWSqw3QsEtXfHjXF2tDyqLex0CFxp2Bw944bDYJB7a6wUr48ji5oD0mBDWE66AGMLmyGb+GNIXv7qmy3kLOy0bivvJWILMwnJIYipmOQ/F8+EEs7W2N8K190OvHBlh/2B2VLnvAwTUIZpVqoH3X5nh5fB0CL9bFokPL0aEooHnSlPZxoCgKm5ZCuy8XYkqP8Az6QzkETJ+AnwOvwqx2M7TpNRuTexzSGLEVjv+5bTBsZyX0G9wZBf72EY7/TzBvny862QCvtMt0eBX+iBqIHXtHQXvGR+z52ejWdwcKt+8Pl4q3seXkFUSHVsD0sDXSyPCx2c3gfqwZXAc0wMujP2HThbbwDf4O1rsmY7L3TvyjqIvmLfrAY243FBa29dmwo6jvOgh1Cl9EwKq/4bBiT+qFcd62KPdGAfkEtMNwUqQ/BjjMQ0ybwehV6z9s/WUnzHtswsapdWCc5m6Lsu9OvjYWm1e7oqTpI2wc6og1sb3g6lwRUUIf2xv3DQKEO6nixCrxnOgT1gBlGzqgm9A/160LRvOFZ/GdYwH5KsctU8BABGQNwyF+ztgxug4WR8/DDqFDm2tNk0hMTISpqXK0SDxpO31ljhXHpqKGquOHdLiPZf0KQIFgjG/4AxrvDURf1QlevWyVu/Pg3DMac4/Ph61q/uSVJa0wO3kDNk2sYCDNyGrmpoByms/TNJs0UpTD2MBgaZpCUuQ69O8UhJ7r7bGx7wkM+3NtavB0HGaMn0/MRC1p7QRsHfExNpT5Q5o+oB2GpzzyxJ6ldqkjOxn1hw+1pkloji61FY7/Tp3CMfXsT2gjHf8J2D+pEebGe+OQdyupX2mWKePbtFFY7lofh22Ppvab5PiVcP3kOIapwrBC6K9JQn8Ve6z21A3NuikQhnmOrlBMP4Vpdsq7QolCObsONYdv0Pg3Qnhuth23RYG8FkgbhpV9fmvVQ6n9KOnJz+jZfC96HNiBfuWVU4/cwnxRZG5ruJ8bBr+Ng1BZ6LvPd38Bp5X2+GPbINWzNuK2miC0UyjmdSkghWH1OVGsI6cm5XVLc3/5WUDmMOwCcSRtqoMbYr86joUFh6fOGVbgEbaN7465wa9gYlEcH9e2xLXLtlivEYb/7hyBH3orT7wz7ZegxRHl3EXN4FxeCNgNFsahZd1Sqe2U8vwqrhacgJOcm5yfj13Z6pbZyLB6p+H+PeE04zrazDqCH/tZp3tBJ/5Q826IdhjWnGObWX/ILAzbpTMP/9X5yegwygq+Ql8SR6s1LzIzCsPa4VYsu/ay93ePQt/pJxFXoBBK1v4EZhevwmHtEekCQbNu4nr9qy3FizZ1hUkhCiWZ0TNcP/oBpl9TjjLzQ4H8IpD2QjDt/H5lHROwun8VXOz6AJ69hb5RdQ+qD7mNgM22+OW0F1qoBnHE7QwLqgbb6papNAkPQvG8mb90MS2GYfU5kWE4vxw9rIeuCMgehsWKxp73gLPrcTj2L4Pt112kuZjxQUNhv7A2tgeNk0aKYs9MROdx1tIJXD0ynNUw3NK/FQK8usBEQ9XIogTKl1DO0+KHAtkReFsYFoPrhoEO+CnMEmaV3bFddRtT++6GuM+I1c7ocewrnFzd5Y2RYc0wnFl/eFsYbhXgghP+rqkjzOK2WnnWx+/CVAjrbIXhEbBeehz/q6/U0gzD7bELw2t7o/HOPzG0ivC7xFOY1NodFZdnFIYD0WG/Jxw0OyUKwSZ1nnN2WoTLUkB3Bd4Mw2n7kQL3sLizI6LHXsVsRyEMV5+BO81Ho1WkD6433Z76kLm4nelP5mLl+LQPoZqq5hMzDOvuMcCS6b9AnoRhkenqMgd0XxYOqybzpTAcF+gG+/UOOCjcErISfq85mpWdMFwt2h9u9r5oqvGgXvzNq/ivYi3p1hM/FMiuwNvCsDgqPGh3X2zxs0Ngv044322fNH9YDMPtet/GxMN+6CpM51HOL+6Dx25/Y/ngDzINw5n1h8zCsEP0L+jd5De09t8nzccVg/qa/q1x0PZPbBhX4Y3pR5k9zb5rfA3Mj5wvTWkS5yg+Ey5Qu/SOlOYMt8dmDP5wC5xDtqCXMBCugHDHp8V4lE0nDEN4TNBvYHNsr7Y59USvSLyBG3eq4qMP06Tj7DYNl6eAzgloT1c4Mq0ept2bmdqPYoU7NZ36v8D3wrMzzc1ev6HFQTW32GZacOr3R/vP72HyAeW0K/Hz39WrMK1VK3XOMEeGda75WaB8IpBnYVg5mtYKy1LmSmFYDAqePYZgy6s6qFuhIBJe3hFOlm2zPU1CDM4Pgr/B4DH7kFyjCUrFX8DN2EaY/euPcCqXT1qJ1chTgczmDA9O8UCXwfcwUTVPWHxYZmAHXzRZFYSvzGahw5dhqFUuDGfCEpAcH4siTb7HttXKcJnZNInM+0OCNPd+8hErVO24GDsXPEjzyjPx+B8wcg+iLCxglJCAUm0WYsNSJ2mf2qPVmYXh5MgDmNR3OHb9WxSWZgqUammLAr+/wmhpznACTi3rglFrjPBxg/IomPgSdy7dQzfVNInnwmh0yzEhKFTcGV7Hv0cjYVuT3cbiQPzHqF/+BS5fjkNrj02Y3710nrYld0YBuQW0w7B44ffzoJ746WJBmJslIcmoKsb8shlDhItV7f6nvmvqIl3MJuPS6gEY7BWGMnXrweT+aTwtMQBL1/0P9YSZExwZlrsluX1DFpAlDGcHVHyFWoxJGdVrZbKzpvayyldcvbJUv6ImJ9viuhTIvoBm8Cz3jsd1xv0hTut1atrlU76iyTiTd5xmtUaZ9ck3Xy33eqvK1yFWTPOOVfG1a49fWGq8ii6rpeByFNBvgXc/9nOvL+u3IEtPgbwTeO9hOO+qyj1RQF6B9OYMy7tHbp0CFKAABShAgZwKMAznVJDrU0AlkPzsLHYfMUarrnWlefD8UIACFKAABSig+wIMw7rfRiwhBShAAQpQgAIUoIBMAgzDMsFysxSgAAUoQAEKUIACui/AMKz7bcQSUoACFKAABShAAQrIJMAwLBMsN0sBClCAAhSgAAUooPsCDMO630YsIQUoQAEKUIACFKCATAIMwzLBcrMUoAAFKEABClCAArovwDCs+23EElKAAhSgAAUoQAEKyCTAMCwTLDdLAQpQgAIUoAAFKKD7AgzDut9GLCEFKEABClCAAhSggEwCDMMywXKzFKAABShAAQpQgAK6L5BhGH769Knul54lpAAFKEABClCAAhSgQA4EODKcAzyuSgEKUIACFKAABSig3wIMw/rdfiw9BShAAQpQgAIUoEAOBBiGc4DHVSlAAQpQgAIUoAAF9FuAYVi/24+lpwAFKEABClCAAhTIgQDDcA7wuCoFKEABClCAAhSggH4LMAzrd/ux9BSgAAUoQAEKUIACORBgGM4BHlelAAUoQAEKUIACFNBvAYZh/W4/lp4CFKAABShAAQpQIAcCDMM5wOOqFKAABShAAQpQgAL6LcAwrN/tx9JTgAIUoAAFKEABCuRAgGE4B3hclQIUoAAFKEABClBAvwUYhvW7/Vh6ClCAAhSgAAUoQIEcCDAM5wCPq1KAAhSgAAUoQAEK6LcAw7B+tx9LTwEKUIACFKAABSiQAwGG4RzgcVUKUIACFKAABShAAf0WYBjW7/Zj6SlAAQpQgAIUoAAFciDAMJwDPK5KAQpQgAIUoAAFKKDfAgzD+t1+LD0FKEABClCAAhSgQA4EGIZzgMdVKUABClCAAhSgAAX0W4BhWL/bj6WnAAUoQAEKUIACsgnEx8fj5cuX2dq+ubk5LCwssrXO+1yYYfh96nPfFKAABShAAQpQQIcFbt26BfGf7HyqVasG8R99+TAM60tLsZwUoAAFKEABClAgjwXUYbhRo0ZZ2vPp06elIMwwnCWurC4Uh8fhz1CwdFlYmWZ1HS5HAQpQgAIUyH8CN4+ux6m7BdKtWLUW/dGsivJXisRn+OevAwi5Ey/9f+k6XdGqnhV4Gs1/x4QcNRKnRkRERKBs2bLSv8VA3KFDhyztat++fe8UhqOiomBtbZ3hPsQyyTX1Is9Ghl9d9kC7LuGYHrYGjlniBJIi/THE8VuEKixRu+dGbJpaK4trpr+YAhfg0aorYkfdwQ+9lV8WD/9NRrEKxWGeoy1z5fwqcGv7ZCy93xPeY2zfWkVx2d9SvsbUHmXeWDY58gA85xxCnYnfoWN55a8jghfAc19NTJzbDaUz2boi8QGOrvKGV8B+hEcVw0ftBuJbj/6oXuj1SvEP/4TXpAUIuPIcJWv1w5QFY2BX1kS1QALu/eWLZYu34dDtaJSo7Ighc2agVy3lUZ8SfxlrZk/Hr0HhiDEvjSaf/Q/Tv26LUjxrvrXNuQAF8lpg67g6mH+soLDbZMQ/f44U82KwNFNIxejkcQazuwKx5z3R94vluGX8ERrULY2CRlG4cfIykquPhu/m0fjYLK9Lzf3pm4AYTMURXnE0+OnTp7KHYfX+xPBdu3btN7hiYmJw5swZVKxYUZYRZ50Ow2cXNMeXp0fgwFY3WOXCkaQdht8loOdCMbgJPRI4PbcxRl2bjBA/l7eWWlx2bvJ6BHrUTLNsSvwpTOvshm33y2J0QDDG1AGenZmCbv0D8KzEIGw5NhU1Mtx6DDYOboTVRv/DHPdOKG/5EH/OGoHl9wdjx95REHN1cvQWfGE/B6bD12GmswVC14/H7E3V4R26DC2Fk97jbX3RYaElxi6aiHaVLXD/8DSM/+4Jhu/dhYFVzmKa3QDcajkHU0Y2hHXcBayaOAlHyi9F0E8OHEV6a6tzAQq8HwHt85m6FOL3wWD7qYjp+hv85zRIHehJSQzFTMfeOFxuEXYI32fF3k+xuVc9EcjrMCyyXLp0KXU0WjMQq4Ow+FCeGM5NTNQDPbmHKWMYjsOtY3twUrhFI96eaWayGE7pjAzH3A3GvqPheFWoMuydWqK8NNqVgKiI/3DC2wU+KYuwQkgPNqrRW3E09+yBnbgSaQSrSvboYFcxtbMr4iLx4JkpSpUtknoSfxUdgceJ1ihfwgKaXx5LnCMRdup7uA19iFHBC+FgVRqli3AoLPcOrfyxpbRhOA6R4fEonHonIQHPIiJRwEY5hSe9MKzAv1jeuw3215+JGnt8UNbnKEaU+gW9221FsylNsNvHAisyDcPCHYy4OKF/FHp9Uktai88/2oKOwbswSEjDV5a0wvDrU3BghZPquI/Cctf6ONbsFDaME8ec4xAfVwgWqSPJCVjdvwoO2oZKv09MTISp6etj/9X5yegwygq+bylX/mhh1oIC8gqId4VmjZyCgHPPYFLsU7jN74jbEy6gZ+gPaCPtOgGX/L8SLlZDcD/WCEUrtsSUn39E9w9NEHfTD6NHeOKvu3EoYFkJzfvNwLxv7KUgm1EYFr8PPt/THbuCxkkXy5qfxLvz4NQ2EI7+f8G9sbz15tb1W+B9hOH0AnFeBGFxv7KEYfEKdE7nPvB/UB71mlWH8cML+Ne4OJ5fKoW5qmkSCjzCttEdMeOEDRo2KA+zl/dw9iwwZMMejKx/HYu694P/jRfCabwwipq3wnzhi6ORdOtnNWKqNEc16zjc/TsEkdYj8dv+8dLIWtxWVzTxqocAjZO4ZpjR/PL43nIE7GYcwfPnybAoWhif9tuEdeOr6/fRy9LnuoDm8ZOCX+FWdR8GhK2VpvpIx5P9KJQUAq442pvRyLBCCJuJplcxR2NZMYCm3JgDp6/M3xqGtSslntA6tbuBCZfXwMksCj496+Ja97tY1u/1PMKI1c7oFuSGE/6ub4zuKhCGeY7t8GjITXj1Tjv3ULzYPDTLCbOfzuXIcK4fTdygoQmI3xGz2/ZAaEMf/DSrA2wSz2G9+wj8EFQTP6jOhfeEvuroVRhT/HzxeT1j3Nk7E95/fY4FM23g07kD7vQ+DK+BpWAcfwfrPH9HE/ex0jSH9MOw8kL39xoH3rhDJdorcEUoTyf801V9oWxoLcL6ZlXgfYVhzUBcsmRJiOWQc0RY7SFLGBavTHv622J1sCeaqkajwv17ovO0D7BI9QXwcGsfOC+tBb8DM1PnL4ULP3PxqouNqjCrHS7EkB1y9hM0baya65gYiGH156Daz6GYape9MCzOGeY0iax2C8NdLjfCsPIklDY4iz8Tj7/shmHxInJNfztsLr0OOxc3hYnWPHh1S6V3Yaj+ndgXuy6pDt/j82GrmjsolqVtt434Txh/ruo0D7/92BUZP8ZguMcDa06B7Ai8PP417L5MxMKzP6GNqq9pXlR3kL4X+iBp4iXM66J1YYp7WNy5Nc7YboDnpOaqu6av955eGFb/7KLD3nTDsLi238CyCKh2MMPfZ6d+XDb/CrzPMCyqnjt3Do8ePZLuWtrZ2ckyNUKz9WQIw8qRqqC6QWk6W9rgmfY2rbpACgRjfMNZ+GjjQYwQpl2+OdKmfBDIx/csom0+Qc8BLRA8vHfqA3HZGRlmGM6/nTg3a6ZLYVh8kG7t0HZY8WQ0Vm8bpRodUo70PBuufChUMwzb/doRh7YN0phvn4Aw/wFwmRePMeu2YUh94zeoXkXfwu7vB2HpY3f8vrpLrszVz8324LYooE8C6Z2TNMNwe+FuU/9qK9AsUPksgfZHnCbh/vWPOPRPFIyL1UCXMd74tl9V6W5PRtMk/IdWwJqiW7BPuFhWf5IjQ3A2qgEafngeU1r2xIsRt9LcSdInU5Y1bwTeZxhWT41QKBRISkqS3miR3kN1uSmR62E4ow6qGYalq2HhrQ5bo2xSn4JVV8oIlui1JBju9m+G4avLHODia4OxPrPRuXI0Tqz0wrKAU2jscUsKAgzDuXlocFuigK6EYfEhvPmuQ/FHwW/w68ZBqKzxNPiO0dWxpuQfaS4+90/6BJ5Gftgzv4GqIeNwdIkLxqwtiskBG9BHmI+Y0SdFnJNc4zC+UE0H4ZFAAQq8m0B80FC0mFkBG0/MhPpdSGnD8C4Mr+2Byqq7mxnvJQ7/hq7GyC98UXfJeXzrkHEYjljXDQ4Li8Hz+Bp0Kqrc4jnvtui3siKmTimIRR7RmBGyCT146+fdGtVA1npfYVh7jvC1a9fSfagut5sh18Ow+DCAOGdpU+mANFem2lMS0rt61a6c5siwQrplZIcbn12C77APpEW1gzfDcG4fHoa3PXEqzqHD5WDfvjRMhJFY789b4GSTo9g0sQLEk1j/alvgfGo7+tqIr/7bhqGOXmi49kimc4ZTj1WNOcPiz9JOk9B+OO+1ffzDAHztMgORrVZg5Xy7N54Cl/pW79sYF7QGPYTXqcWe94Bz71AMEt4WMUh456g4orx5fFcsvNYRP/p/h2ZC2dWflLhTmDFkA5rPWyxcYIrTj4TRY7++6OFdE+tOf4e6hncIsMYUyJLAwzMB+KdIN9hLD7oF4aAw8urcqLhwXnqEv7ZdQmnntqhotBvDbSfD3D0Inv1KCSO6qrsz0yywQDVl8Mi0enC/OhqbNg2TLnLF/r79r5bo9Zk1TuzYjZqdPpNecyhud+lnTfBgwG0sEl5uk9HAk7jchoGt4PmgB5at/1b1isU4bBtlC/c/E1B7QNoL5yxVlgsZnMD7CMMZPSyX0VsmcrNRZAjDgDgfuP3Uxxi8YTcmNC4onYx3Te+DiVuqw0v1BRBzfBwcBl+Eq2oZsVLSl0BoG/R1Vr70Je00iQRhrlN1bLTZLM2VFG8TpcTvxUi7kbBwD1OODAe6wVb4kll6+he0F+Yqi+9endJvJE4Ir5IRX42V/qvVbmPSNT84GqV9oj43kbkt/RFIitwhBM+JOPisCMyTY1DoQ833ckbB/ys7zD5mBkuLAvigSlOUiriAFstzHoYrX5+N9p23wM7nIr7TeBG3+oEXv3tpR3KNFOUwNvXWqvA0+uq+cFt4DUaFCyA+tih6zt8Cj8/Eky8gvqKwl++jNxpBeVKsits7xmPgtAN4alYYBZNiEWfyCb5ZtTndaRT605IsKQXkE1D3y72Vl+GkMJ1IHNyZd3MMAg+7o7zUlzeg8cIbUmh9EPwN3L76HY8KWcFcYYqag9sj0ete6p0XReIN/DykD7xDklC4sDFexn+A7nPF/muE38d3h8e+GBSvXAEmD8ORVPFLLFe9JzijMCxdfAvbXDv6Cyw+9FB6D7FJwlMkGlfEJzXiEBbTA7/tdkcNvmtYvgMkH2w5vTBcrFjWXsgnvpc4u3+BTvyDGqdOncrwYTl1IM7udrPaFLKEYfHK9M9pvTHutwiYWFnBOKkQ2g1sgBCfaI0/upEgnYT7TzqA2MKFpSvml/HFhS+BTaknce05w4l3V6KfyyLcKdEYdWye47GiKizu7kDZscowrH6Lhd+dgrAS3nVlYtkGPRqeQsDjb9INwymJuzG66dc4lPgBSjacir1resIiq3JcLl8LvHhyDzEmZdJ93V5mv3t3lATsHT8E/07wk16X9i4f5R+REd6/8k5/rVH5OsMXZiWk1xDyQwEKZC6giItGjGkR6bWKYt+LSSwKK9UD43HR0TAt8voVn+LrDR+HP4Fx8QooWijtW2nUexFfDXr/SQKKaPVfZb+OQYF3ev2ncr+vLJT9Wjw3X79qjJq1irN5KZCpgGYYjhNe7/nw4cNsiZUpUwblypXL1jriX7mrVKlShg/Lve332dqZ1sKyhOG3de60ZVB1VmOrNO8HzrhSyuWTM/xiyN5JPWcBIif0XJcCrwWSHgdizb66GKR6OIY2FKBA/hTQfkVj/qwla6XvApphOLM/kazv9VSXX9YwnF+QWA8KUIACFKBAbggocAJLh55Dm5UjUS83NshtUEAGAXUYzs6mxWkUtra22VlFZ5ZlGNaZpmBBKEABClCAAhSgwPsXEOfwRkREZKsg4h/HyO7UiGztQMaFGYZlxOWmKUABClCAAhSgAAV0W4BhWLfbh6WjAAUoQAEKUIACFJBRgGFYRlxumgIUoAAFKEABClBAtwUYhnW7fVg6ClCAAhSgAAUoQAEZBRiGZcTlpilAAQpQgAIUoAAFdFuAYVi324elowAFKEABClCAAhSQUYBhWEZcbpoCFKAABShAAQpQQLcFGIZ1u31YOgpQgAIUoAAFKEABGQUYhmXE5aYpQAEKUIACFKAABXRbgGFYt9uHpaMABShAAQpQgAIUkFGAYVhGXG6aAhSgAAUoQAEKUEC3BRiGdbt9WDoKUIACFKAABShAARkFGIZlxOWmKUABClCAAhSgAAV0W4BhWLfbh6WjAAUoQAEKUIACFJBRgGFYRlxumgIUoAAFKEABClBAtwUYhnW7fVg6ClCAAhSgAAUoQAEZBRiGZcTlpilAAQpQgAIUoAAFdFuAYVi324elowAFKEABClCAAhSQUYBhWEZcbpoCFKAABShAAQpQQLcFGIZ1u31YOgpQgL34LDMAAACaSURBVAIUoAAFKEABGQUYhmXE5aYpQAEKUIACFKAABXRbgGFYt9uHpaMABShAAQpQgAIUkFGAYVhGXG6aAhSgAAUoQAEKUEC3BRiGdbt9WDoKUIACFKAABShAARkFGIZlxOWmKUABClCAAhSgAAV0W4BhWLfbh6WjAAUoQAEKUIACFJBRgGFYRlxumgIUoAAFKEABClBAtwX+D4/nW2ut3arDAAAAAElFTkSuQmCC)"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "markdown",
98
+ "metadata": {},
99
+ "source": [
100
+ "6. Click the two overlaid squares ![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEEAAAA/CAYAAAC/36X0AAABf0lEQVR4Xu2aPYqFMBhFvwcWKlhoqaWFG9DNuwVXYGGprYUgdjMRMTgyPHRiRpk5AeEJ5u+8e6+R5PWhivzz8gKCCBCUC4AAhCUMUQIQUIJeGGAH7IAdsMP2S4FMuDoT+r6Xqqoe8TlWFIWEYXhoLJcqAQiKORC+geC6rvi+f0iSpg+N4yjTNOlmHmOHNE1lvn6jNE0j87UWICgSQLANYQ6/tTiOI0EQ6Pt9MP5ZO5RlqScdRZHkeQ4EIKAEEeygkgAIQFheCCgBCCiBxRIrRqUBIABhiQKUAASUoF+L2AE7YAfssN03IBPIhPOZEMexJEmyFZK1323bStd1un2rmy9n7GBtxgcaBoKCBIQ7IQzDIHVdHxCr/UeyLPuyRfiux9MnVd5lgv2p2ekBCIorEEwheJ4n81rgjjIfBbpqDWKkhDsmv/a53/gxGQsQTO1gQt+07q1K2B7XMZ2ISf39USGTtk7bwaSzp9YFwk8y4an/psm4UAJKWPSDEoCwKOET7bDhYssL2DsAAAAASUVORK5CYII=) to copy the API key; then run the code below and paste it into the input text box and press Enter."
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": null,
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "os.environ['ACTIVELOOP_TOKEN'] = getpass(\"DeepLake API key: \")"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "markdown",
114
+ "metadata": {},
115
+ "source": [
116
+ "Finally, pick a name for your dataset. It doesn't matter what this is, but keep in mind that it will be shared with the students."
117
+ ]
118
+ },
119
+ {
120
+ "cell_type": "code",
121
+ "execution_count": null,
122
+ "metadata": {},
123
+ "outputs": [],
124
+ "source": [
125
+ "dataset_name = input(\"Enter a name for your dataset: \")"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "markdown",
130
+ "metadata": {},
131
+ "source": [
132
+ "## Processing The Document(s)\n",
133
+ "\n",
134
+ "In this part, you will upload the documents you want the students / model to reference; the embeddings will be created from those documents.\n",
135
+ "\n",
136
+ "**Note: The embeddings of all the documents you share will be publicly available. Do not use this for any documents you want to keep private.**"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "markdown",
141
+ "metadata": {},
142
+ "source": [
143
+ "First, upload your documents to Google Colab. To do this:\n",
144
+ "1. Click on the 'file' icon ![image1_3.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFQAAABUCAYAAAAcaxDBAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAALEoAACxKAXd6dE0AAAIKSURBVHhe7do/kmFRFMfxM7MAGyCRKZEI60CA0CJkEjKJHZApAevwJxJhAWyABfTM6T7JdDZ1v6/rvfL7VHV1ndfJ7W9f+ql3f338ZYL5Hd8FoqAwBYUpKExBYQoKU1CYgsIUFKagMAWFKShMQWEKClNQmILCFBSmoDAFhSkoTEFhCgpTUJiCwrCDDs/n01arlW23W3s8HnH1/83nc+v1ejEVDxLUYw4GA7ter3ElTZGjIi/56XSKxXTj8fhzpxdR8g713dloNGJiFXGnJgc9HA42HA5j4k0mE6vX6zExyuWyVSqVmFi5D5qVVquVyR/rbW+bjsfj5z/Sy+USVxhvfR/6er1sNpvFxHj7G3vfqff7PaZ0mbyHNptNa7fbMeWLr/d0OsX0Zb1ec+v1oCn2+/1HtVr952uxWMRP88fX9n29/jtQ9FkepqAwBYUpKExBYQoKU1CYgsIUFKagMAWFKShMQWEKClNQmILCFBSmoDAFhSkoTEFhmTxGzvLsUCp/Bv/9/Cr5GDnXp+9+yvl8tlKpFFOa5Je8L6TT6cRUPL52KqZL3qHOd2m/37fb7RZXiqFWq9lms8lfUOdRl8ul7Xa7pDP2P8Hf47vdro1GIzSmw4LKF902wRQUpqAwBYUpKExBYQoKU1CYgsIUFKagMAWFKShMQWEKClNQmILCFBSmoDAFhSkoyuwPHzLZbD4ZgJMAAAAASUVORK5CYII=) at the bottom of the sidebar to the left of these instructions.\n",
145
+ "2. Click on the 'upload file' icon ![image2_1.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFcAAABaCAYAAADNVsqyAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAALEoAACxKAXd6dE0AAAKKSURBVHhe7dw7bsJAFIVhJx3UtLARaNkILIKakkXAGhAlNSyCmj3QJjnRjOQgY173eO5kzi+h2Fgok08Tg8zAx9dPlaL0GX4qQsIlJlxiwiUmXGLCJSZcYsIlJlxiwiUmXGLCJSZcYsIlJlxiwiUmXGLCJUZ9m+d4PFb7/b46n8/hHm7j8biaz+dhL30U3MvlUq1Wq85Q63kCppwWUsEi/Les1+uwlzZz3MPhkAw25gXYHBd/mIc8AJvjnk6nsJW+1MDmT2iz2Sxs/W2z2YQt2279vnqpnuSKeJ2bagYXgYtSABeDi7oGLgoXdQlcHC7qCrhIXNQFcLG4iA1cNC5iAhePi1jAwg0xgIVbyxpYuFcB2CrhEhMusewvOb4Te6yaucSES0y4xIRLTLjEssLFmgjcmFeyLMsGF6h4iYQb80qWZVngRth6OQC7x22CjXkHdo3bBhvzDOwW9xHYGIC3223Y85NL3GdgY7vd7vdxnnKH+wpsDI/zBOwK9x3YmCdgN7gWsDEvwC5wLWFjHoCT496D7fV6Yau5tuOpgZPjtq1EB9xisQh7zeF4G3DKle7JcbHiGyu/r4uwo9Eo3NMcjt8CTv2xKRfn3Gvg4XBYLZfLu7CxJmAPn0dzgYsiMGABNRgMwpHHqgN7gEVucBFAANTv98M9zwVgzHgPsMgVLnoVNvbsjGfmDvc/JVxiwiUmXGLCJSZcYlrl2JDVWDVziQmXWGenhZxye1rAhZecsxy/Oe50Og1beWY5fnPcyWSS7ezFuDF+qyhPaLhsmBswxotxW0b5RrwYvmoQS41Sf89YW0DFqcByxsaouKWn17nEhEtMuMSES0y4xIRLTLjEhEtMuMSES6uqvgFS+TQXb05HUQAAAABJRU5ErkJggg==) on the left of the Files toolbar.\n",
146
+ "3. Select all of the files you want to upload, then click 'Open'.\n",
147
+ "4. A warning should pop up. Click 'OK' to continue.\n",
148
+ "5. Wait until the spinning circle in the bottom of the 'Files' section disappears. This means that all of the files have been uploaded."
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "markdown",
153
+ "metadata": {},
154
+ "source": [
155
+ "### Adding YouTube Videos / Websites\n",
156
+ "If you have any websites or YouTube videos which also contain content which you want to put into your data lake, paste those links one at a time into the text box below, pressing 'Enter' after each one. Once you have entered all the links, press 'Enter' without typing anything to finish execution of the code cell.\n",
157
+ "\n",
158
+ "If you have no URLs to add, just click on the box and press 'Enter' without typing anything."
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "metadata": {},
165
+ "outputs": [],
166
+ "source": [
167
+ "url_list = []\n",
168
+ "while (url := input(\"Enter a YouTube / website link: \")): url_list.append(url)"
169
+ ]
170
+ },
171
+ {
172
+ "cell_type": "markdown",
173
+ "metadata": {},
174
+ "source": [
175
+ "### Model for embeddings\n",
176
+ "\n",
177
+ "Below, you can choose a different model which will be used to create the embeddings. At the current time, only OpenAI models are supported. If you're not sure, the following setting should suffice."
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": null,
183
+ "metadata": {},
184
+ "outputs": [],
185
+ "source": [
186
+ "model_name = 'text-embedding-ada-002'"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "markdown",
191
+ "metadata": {},
192
+ "source": [
193
+ "## Embedding & Database Creation\n",
194
+ "\n",
195
+ "Now that you've made all of the relevant settings, click the \"Run\" arrow next to this code block, or select this cell and then click \"Run This Cell and All Below\" or \"Run All Below\". This will automatically execute the rest of the code so that your database can be created from your specifications.\n",
196
+ "\n",
197
+ "You can ignore any warnings that pop up, but if the code stops execution, read the error. If you cannot fix it, please contact the developer."
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "markdown",
202
+ "metadata": {},
203
+ "source": [
204
+ "### Library download and installation"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "execution_count": null,
210
+ "metadata": {},
211
+ "outputs": [],
212
+ "source": [
213
+ "# run this code if you're using Google Colab or don't have these packages installed in your computing environment\n",
214
+ "#! pip install git+https://<token>@github.com/vanderbilt-data-science/lo-achievement.git\n",
215
+ "#! pip install deeplake"
216
+ ]
217
+ },
218
+ {
219
+ "cell_type": "code",
220
+ "execution_count": null,
221
+ "metadata": {},
222
+ "outputs": [],
223
+ "source": [
224
+ "# basic libraries\n",
225
+ "import os\n",
226
+ "from getpass import getpass\n",
227
+ "from IPython.display import display, Markdown\n",
228
+ "\n",
229
+ "# libraries from our package\n",
230
+ "from ai_classroom_suite.PromptInteractionBase import *\n",
231
+ "from ai_classroom_suite.MediaVectorStores import *\n",
232
+ "\n",
233
+ "# from langchain\n",
234
+ "import deeplake\n",
235
+ "from langchain.vectorstores import DeepLake\n",
236
+ "from langchain.embeddings import OpenAIEmbeddings"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": null,
242
+ "metadata": {},
243
+ "outputs": [],
244
+ "source": [
245
+ "#setup OpenAI API key\n",
246
+ "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY\n",
247
+ "openai.api_key = OPENAI_API_KEY"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "execution_count": null,
253
+ "metadata": {},
254
+ "outputs": [],
255
+ "source": [
256
+ "# get transcripts from youtube URLs\n",
257
+ "yt_docs, yt_save_path = get_website_youtube_text_file(url_list)"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "markdown",
262
+ "metadata": {},
263
+ "source": [
264
+ "Now, we'll create the embeddings and the vector store from the transcripts of the YouTube videos. Make sure that all your documents are shown in the output from the previous code cell, then continue execution."
265
+ ]
266
+ },
267
+ {
268
+ "cell_type": "code",
269
+ "execution_count": null,
270
+ "metadata": {},
271
+ "outputs": [],
272
+ "source": [
273
+ "# create document segments\n",
274
+ "doc_segments = rawtext_to_doc_split(yt_docs)"
275
+ ]
276
+ },
277
+ {
278
+ "cell_type": "markdown",
279
+ "metadata": {},
280
+ "source": [
281
+ "Make sure that all of your documents are shown in the output from the previous code cell, then continue execution."
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": null,
287
+ "metadata": {},
288
+ "outputs": [],
289
+ "source": [
290
+ "# create embeddings\n",
291
+ "embeddings = OpenAIEmbeddings(model=model_name)\n",
292
+ "\n",
293
+ "### Dataset Creation ###\n",
294
+ "dataset_path = f\"hub://{DEEPLAKE_USERNAME}/{dataset_name}\"\n",
295
+ "db = DeepLake.from_documents(all_document_segments, dataset_path=dataset_path,\n",
296
+ " embedding=embeddings, public=True)"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "markdown",
301
+ "metadata": {},
302
+ "source": [
303
+ "## Sharing With Students"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": null,
309
+ "metadata": {},
310
+ "outputs": [],
311
+ "source": [
312
+ "display(Markdown(f'''To let students access the repository, give them the following URL:\n",
313
+ "\n",
314
+ "`{dataset_path}`'''))"
315
+ ]
316
+ },
317
+ {
318
+ "cell_type": "markdown",
319
+ "metadata": {},
320
+ "source": [
321
+ "Distribute the URL above to students. They will copy and paste it into the LLM learning application, which then allows their models to use all of the documents you uploaded as reference sources when responding to or creating questions."
322
+ ]
323
+ }
324
+ ],
325
+ "metadata": {
326
+ "kernelspec": {
327
+ "display_name": "python3",
328
+ "name": "python3"
329
+ }
330
+ },
331
+ "nbformat": 4,
332
+ "nbformat_minor": 0
333
+ }
lo-achievement/nbs/_quarto.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project:
2
+ type: website
3
+
4
+ format:
5
+ html:
6
+ theme: cosmo
7
+ css: styles.css
8
+ toc: true
9
+
10
+ website:
11
+ twitter-card: true
12
+ open-graph: true
13
+ repo-actions: [issue]
14
+ navbar:
15
+ background: primary
16
+ search: true
17
+ sidebar:
18
+ style: floating
19
+
20
+ metadata-files: [nbdev.yml, sidebar.yml]
lo-achievement/nbs/gradio_application.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
lo-achievement/nbs/helper_utilities.ipynb ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# helper_utilities.ipynb\n",
8
+ "> Helper functions for when we need to work with files in Google Colab or locally\n",
9
+ "\n",
10
+ "In this notebook, we write some generic code to help us interface more easily with loading in files."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "raw",
15
+ "metadata": {},
16
+ "source": [
17
+ "---\n",
18
+ "skip_exec: true\n",
19
+ "---"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": null,
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "#| default_exp IOHelperUtilities"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": null,
34
+ "metadata": {},
35
+ "outputs": [],
36
+ "source": [
37
+ "#| export\n",
38
+ "import ipywidgets as widgets\n",
39
+ "from IPython.display import display, clear_output\n",
40
+ "from functools import partial\n",
41
+ "from ipyfilechooser import FileChooser\n",
42
+ "import os"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": null,
48
+ "metadata": {},
49
+ "outputs": [],
50
+ "source": [
51
+ "#| export\n",
52
+ "def check_is_colab():\n",
53
+ " \"\"\"\n",
54
+ " Check if the current environment is Google Colab.\n",
55
+ " \"\"\"\n",
56
+ " try:\n",
57
+ " import google.colab\n",
58
+ " return True\n",
59
+ " except:\n",
60
+ " return False"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": null,
66
+ "metadata": {},
67
+ "outputs": [],
68
+ "source": [
69
+ "assert not check_is_colab(), 'On this system, we should not be in Colab'"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "markdown",
74
+ "metadata": {},
75
+ "source": [
76
+ "## File Choosers\n",
77
+ "Jupyter notebooks, the different IDEs, and ipywidgets currently (as of August 8, 2023) are not playing nice together, and also are misaligned in terms of versions. What works in Jupyter Lab at version 8 somehow doesn't work in Google Colab and changes are needed. Neither the Google Colab version or the Jupyter Lab version work with VSCode.\n",
78
+ "\n",
79
+ "While this is being worked out between VS Code developers and ipywidgets, we've found a mid-term solutions which requires another package. We implement and test this below (thanks, Code Interpreter!)"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": null,
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "#| export\n",
89
+ "class MultiFileChooser:\n",
90
+ " def __init__(self):\n",
91
+ " self.fc = FileChooser('.')\n",
92
+ " self.fc.title = \"Use the following file chooser to add each file individually.\\n You can remove files by clicking the remove button.\"\n",
93
+ " self.fc.use_dir_icons = True\n",
94
+ " self.fc.show_only_dirs = False\n",
95
+ " self.selected_files = []\n",
96
+ " \n",
97
+ " self.fc.register_callback(self.file_selected)\n",
98
+ " \n",
99
+ " self.output = widgets.Output()\n",
100
+ " \n",
101
+ " def file_selected(self, chooser):\n",
102
+ " if self.fc.selected is not None and self.fc.selected not in self.selected_files:\n",
103
+ " self.selected_files.append(self.fc.selected)\n",
104
+ " self.update_display()\n",
105
+ " \n",
106
+ " def update_display(self):\n",
107
+ " with self.output:\n",
108
+ " clear_output()\n",
109
+ " for this_file in self.selected_files:\n",
110
+ " remove_button = widgets.Button(description=\"Remove\", tooltip=\"Remove this file\")\n",
111
+ " remove_button.on_click(partial(self.remove_file, file=this_file))\n",
112
+ " display(widgets.HBox([widgets.Label(value=this_file), remove_button]))\n",
113
+ " \n",
114
+ " def remove_file(self, button, this_file):\n",
115
+ " if this_file in self.selected_files:\n",
116
+ " self.selected_files.remove(this_file)\n",
117
+ " self.update_display()\n",
118
+ " \n",
119
+ " def display(self):\n",
120
+ " display(self.fc, self.output)\n",
121
+ " \n",
122
+ " def get_selected_files(self):\n",
123
+ " return self.selected_files"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "markdown",
128
+ "metadata": {},
129
+ "source": [
130
+ "Now we test the file chooser very briefly to ensure that the results are as we desire."
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "metadata": {},
137
+ "outputs": [
138
+ {
139
+ "data": {
140
+ "application/vnd.jupyter.widget-view+json": {
141
+ "model_id": "07ff01c3633c4addb72bfe758fb9c4e4",
142
+ "version_major": 2,
143
+ "version_minor": 0
144
+ },
145
+ "text/plain": [
146
+ "FileChooser(path='/workspaces/lo-achievement/nbs', filename='', title='Use the following file chooser to add e…"
147
+ ]
148
+ },
149
+ "metadata": {},
150
+ "output_type": "display_data"
151
+ },
152
+ {
153
+ "data": {
154
+ "application/vnd.jupyter.widget-view+json": {
155
+ "model_id": "1aa36e43d7254a2f8669a92e156b726c",
156
+ "version_major": 2,
157
+ "version_minor": 0
158
+ },
159
+ "text/plain": [
160
+ "Output()"
161
+ ]
162
+ },
163
+ "metadata": {},
164
+ "output_type": "display_data"
165
+ }
166
+ ],
167
+ "source": [
168
+ "# Create file chooser and interact\n",
169
+ "mfc = MultiFileChooser()\n",
170
+ "mfc.display()"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": null,
176
+ "metadata": {},
177
+ "outputs": [
178
+ {
179
+ "data": {
180
+ "text/plain": [
181
+ "['/workspaces/lo-achievement/nbs/_quarto.yml',\n",
182
+ " '/workspaces/lo-achievement/nbs/nbdev.yml']"
183
+ ]
184
+ },
185
+ "execution_count": null,
186
+ "metadata": {},
187
+ "output_type": "execute_result"
188
+ }
189
+ ],
190
+ "source": [
191
+ "# get files that were selected.\n",
192
+ "mfc.get_selected_files()"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "markdown",
197
+ "metadata": {},
198
+ "source": [
199
+ "## File loading\n",
200
+ "Now, we implement a file chooser that will work across platforms, whether it be Google Colab or local environments."
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": null,
206
+ "metadata": {},
207
+ "outputs": [],
208
+ "source": [
209
+ "#| export\n",
210
+ "def setup_drives(upload_set):\n",
211
+ "\n",
212
+ " upload_set = upload_set.lower()\n",
213
+ " uploaded = None\n",
214
+ "\n",
215
+ " # allow them to mount the drive if they chose Google Colab.\n",
216
+ " if upload_set == 'google drive':\n",
217
+ " if check_is_colab():\n",
218
+ " from google.colab import drive\n",
219
+ " drive.mount('/content/drive')\n",
220
+ " else:\n",
221
+ " raise ValueError(\"It looks like you're not on Google Colab. Google Drive mounting is currently only implemented for Google Colab.\")\n",
222
+ "\n",
223
+ " # Everything else means that they'll need to use a file chooser (including Google Drive)\n",
224
+ " if check_is_colab():\n",
225
+ " from google.colab import files\n",
226
+ " uploaded = files.upload()\n",
227
+ " else:\n",
228
+ " # Create file chooser and interact\n",
229
+ " mfc = MultiFileChooser()\n",
230
+ " mfc.display()\n",
231
+ " uploaded = mfc.get_selected_files()\n",
232
+ " \n",
233
+ " return uploaded"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "code",
238
+ "execution_count": null,
239
+ "metadata": {},
240
+ "outputs": [
241
+ {
242
+ "data": {
243
+ "application/vnd.jupyter.widget-view+json": {
244
+ "model_id": "c88d8353fb0d4dc6a2df946ea2082e5f",
245
+ "version_major": 2,
246
+ "version_minor": 0
247
+ },
248
+ "text/plain": [
249
+ "FileChooser(path='/workspaces/lo-achievement/nbs', filename='', title='Use the following file chooser to add e…"
250
+ ]
251
+ },
252
+ "metadata": {},
253
+ "output_type": "display_data"
254
+ },
255
+ {
256
+ "data": {
257
+ "application/vnd.jupyter.widget-view+json": {
258
+ "model_id": "9b8ed072582b4e87ace35d2d59c3a82f",
259
+ "version_major": 2,
260
+ "version_minor": 0
261
+ },
262
+ "text/plain": [
263
+ "Output()"
264
+ ]
265
+ },
266
+ "metadata": {},
267
+ "output_type": "display_data"
268
+ }
269
+ ],
270
+ "source": [
271
+ "res = setup_drives('local drive')"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": null,
277
+ "metadata": {},
278
+ "outputs": [
279
+ {
280
+ "data": {
281
+ "text/plain": [
282
+ "['/workspaces/lo-achievement/nbs/_quarto.yml',\n",
283
+ " '/workspaces/lo-achievement/nbs/nbdev.yml']"
284
+ ]
285
+ },
286
+ "execution_count": null,
287
+ "metadata": {},
288
+ "output_type": "execute_result"
289
+ }
290
+ ],
291
+ "source": [
292
+ "res"
293
+ ]
294
+ },
295
+ {
296
+ "cell_type": "markdown",
297
+ "metadata": {},
298
+ "source": [
299
+ "Now, we'll verify the behavior of Google Drive. We'll wrap this in a try/except block so the code can run all the way through."
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": null,
305
+ "metadata": {},
306
+ "outputs": [
307
+ {
308
+ "name": "stdout",
309
+ "output_type": "stream",
310
+ "text": [
311
+ "An exception of type ValueError occurred. Arguments:\n",
312
+ "It looks like you're not on Google Colab. Google Drive mounting is currently only implemented for Google Colab.\n"
313
+ ]
314
+ }
315
+ ],
316
+ "source": [
317
+ "try:\n",
318
+ " setup_drives('google drive')\n",
319
+ "except Exception as e:\n",
320
+ " print(f\"An exception of type {type(e).__name__} occurred. Arguments:\\n{e}\")"
321
+ ]
322
+ },
323
+ {
324
+ "cell_type": "markdown",
325
+ "metadata": {},
326
+ "source": [
327
+ "## Future expected implementation\n",
328
+ "\n",
329
+ "The following code is included as it works, just not in Visual Studio code. The current implementation of the File chooser is a bit inelegant, but this is due to the current limitations of the combination of the libraries and platforms. Once some errors with VS code can be updated, this code will be the preferable solution as it is more familiar to users."
330
+ ]
331
+ },
332
+ {
333
+ "cell_type": "code",
334
+ "execution_count": null,
335
+ "metadata": {},
336
+ "outputs": [],
337
+ "source": [
338
+ "import ipywidgets as widgets\n",
339
+ "from IPython.display import display\n",
340
+ "\n",
341
+ "class UniversalFileUpload:\n",
342
+ "\n",
343
+ " def __init__(self):\n",
344
+ " self.filelist = []\n",
345
+ " self.uploader = None\n",
346
+ " self.status_output = None\n",
347
+ " \n",
348
+ " def _process_upload(self, change):\n",
349
+ " self.status_output.clear_output()\n",
350
+ " with self.status_output:\n",
351
+ " print('What is happening?')\n",
352
+ " print(change)\n",
353
+ "\n",
354
+ " def process_uploads(self, change):\n",
355
+ " if change['new'] and change['new'] != None:\n",
356
+ " with self.status_output:\n",
357
+ " print(change)\n",
358
+ " \n",
359
+ " self.filelist = change['new']\n",
360
+ " \n",
361
+ " #get filenames and promt\n",
362
+ " fnames = [fileinfo['name'] for fileinfo in self.filelist['metadata']]\n",
363
+ " with self.status_output:\n",
364
+ " print('Uploaded files:', fnames)\n",
365
+ " \n",
366
+ " #clear it so it doesn't save state\n",
367
+ " self.uploader.close()\n",
368
+ " \n",
369
+ " def get_upload_value(self):\n",
370
+ " return self.filelist\n",
371
+ " \n",
372
+ " def choose_files(self):\n",
373
+ " self.uploader = widgets.FileUpload(accept='', multiple=True, description='cat')\n",
374
+ " self.status_output = widgets.Output()\n",
375
+ " self.file_output_box = widgets.VBox([self.uploader, self.status_output])\n",
376
+ " self.uploader.observe(self._process_upload)\n",
377
+ "\n",
378
+ " with self.status_output:\n",
379
+ " print('Waiting...')\n",
380
+ "\n",
381
+ " return self.file_output_box"
382
+ ]
383
+ },
384
+ {
385
+ "cell_type": "code",
386
+ "execution_count": null,
387
+ "metadata": {},
388
+ "outputs": [],
389
+ "source": [
390
+ "#test\n",
391
+ "ul = UniversalFileUpload()\n",
392
+ "ul.choose_files()"
393
+ ]
394
+ }
395
+ ],
396
+ "metadata": {
397
+ "kernelspec": {
398
+ "display_name": "python3",
399
+ "language": "python",
400
+ "name": "python3"
401
+ }
402
+ },
403
+ "nbformat": 4,
404
+ "nbformat_minor": 2
405
+ }
lo-achievement/nbs/media_stores.ipynb ADDED
@@ -0,0 +1,920 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# media_stores.ipynb\n",
8
+ "> A notebook for storing all types of media as vector stores\n",
9
+ "\n",
10
+ "In this notebook, we'll implement the functionality required to interact with many types of media stores. This is - not just for text files and pdfs, but also for images, audio, and video.\n",
11
+ "\n",
12
+ "Below are some references for integration of different media types into vector stores.\n",
13
+ "\n",
14
+ "- YouTube: https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/youtube_audio\n",
15
+ "- Websites:\n",
16
+ " - https://js.langchain.com/docs/modules/indexes/document_loaders/examples/web_loaders/\n",
17
+ " - https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/web_base\n",
18
+ " - Extracting relevant information from website: https://www.oncrawl.com/technical-seo/extract-relevant-text-content-from-html-page/\n",
19
+ "\n",
20
+ ":::{.callout-caution}\n",
21
+ "These notebooks are development notebooks, meaning that they are meant to be run locally or somewhere that supports navigating a full repository (in other words, not Google Colab unless you clone the entire repository to drive and then mount the Drive-Repository.) However, it is expected if you're able to do all of those steps, you're likely also able to figure out the required pip installs for development there.\n",
22
+ ":::\n"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "raw",
27
+ "metadata": {},
28
+ "source": [
29
+ "---\n",
30
+ "skip_exec: true\n",
31
+ "---"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": null,
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "#| default_exp MediaVectorStores"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "#| export\n",
50
+ "# import libraries here\n",
51
+ "import os\n",
52
+ "import itertools\n",
53
+ "\n",
54
+ "from langchain.embeddings import OpenAIEmbeddings\n",
55
+ "\n",
56
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
57
+ "from langchain.document_loaders.unstructured import UnstructuredFileLoader\n",
58
+ "from langchain.document_loaders.generic import GenericLoader\n",
59
+ "from langchain.document_loaders.parsers import OpenAIWhisperParser\n",
60
+ "from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader\n",
61
+ "from langchain.document_loaders import WebBaseLoader, UnstructuredURLLoader\n",
62
+ "from langchain.docstore.document import Document\n",
63
+ "\n",
64
+ "from langchain.vectorstores import Chroma\n",
65
+ "from langchain.chains import RetrievalQAWithSourcesChain"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "markdown",
70
+ "metadata": {},
71
+ "source": [
72
+ "Note that we will not export the following packages to our module because in this exploration we have decided to go with langchain implementations, or they are only used for testing."
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": null,
78
+ "metadata": {},
79
+ "outputs": [],
80
+ "source": [
81
+ "#exploration\n",
82
+ "import trafilatura\n",
83
+ "import requests\n",
84
+ "import justext"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "markdown",
89
+ "metadata": {},
90
+ "source": [
91
+ "## Media to Text Converters\n",
92
+ "In this section, we provide a set of converters that can either read text and convert it to other useful text, or read YouTube or Websites and convert them into text."
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "markdown",
97
+ "metadata": {},
98
+ "source": [
99
+ "### Standard Text Splitter\n",
100
+ "Here we define a standard text splitter. This can be used on any text."
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": null,
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "#| export\n",
110
+ "def rawtext_to_doc_split(text, chunk_size=1500, chunk_overlap=150):\n",
111
+ " \n",
112
+ " # Quick type checking\n",
113
+ " if not isinstance(text, list):\n",
114
+ " text = [text]\n",
115
+ "\n",
116
+ " # Create splitter\n",
117
+ " text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,\n",
118
+ " chunk_overlap=chunk_overlap,\n",
119
+ " add_start_index = True)\n",
120
+ " \n",
121
+ " #Split into docs segments\n",
122
+ " if isinstance(text[0], Document):\n",
123
+ " doc_segments = text_splitter.split_documents(text)\n",
124
+ " else:\n",
125
+ " doc_segments = text_splitter.split_documents(text_splitter.create_documents(text))\n",
126
+ "\n",
127
+ " # Make into one big list\n",
128
+ " doc_segments = list(itertools.chain(*doc_segments)) if isinstance(doc_segments[0], list) else doc_segments\n",
129
+ "\n",
130
+ " return doc_segments"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "metadata": {},
137
+ "outputs": [
138
+ {
139
+ "data": {
140
+ "text/plain": [
141
+ "[Document(page_content='This is a', metadata={}),\n",
142
+ " Document(page_content='sentence.', metadata={}),\n",
143
+ " Document(page_content='This is', metadata={}),\n",
144
+ " Document(page_content='another', metadata={}),\n",
145
+ " Document(page_content='sentence.', metadata={}),\n",
146
+ " Document(page_content='This is a', metadata={}),\n",
147
+ " Document(page_content='a third', metadata={}),\n",
148
+ " Document(page_content='sentence.', metadata={})]"
149
+ ]
150
+ },
151
+ "execution_count": null,
152
+ "metadata": {},
153
+ "output_type": "execute_result"
154
+ }
155
+ ],
156
+ "source": [
157
+ "# test basic functionality\n",
158
+ "rawtext_to_doc_split([\"This is a sentence. This is another sentence.\", \"This is a third sentence.\"], chunk_size=10, chunk_overlap=5)"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "markdown",
163
+ "metadata": {},
164
+ "source": [
165
+ "We'll write a quick function to do a unit test on the function we just wrote."
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": null,
171
+ "metadata": {},
172
+ "outputs": [],
173
+ "source": [
174
+ "def test_split_texts():\n",
175
+ " \n",
176
+ " # basic behavior\n",
177
+ " text = \"This is a sample text that we will use to test the splitter function.\"\n",
178
+ " expected_output = [\"This is a sample text that we will use to test the splitter function.\"]\n",
179
+ " out_splits = [doc.page_content for doc in rawtext_to_doc_split(text)]\n",
180
+ " assert all([target==expected for target, expected in zip(expected_output, out_splits)]), ('The basic splitter functionality is incorrect, and does not correctly ' +\n",
181
+ " 'use chunk_size and chunk_overlap on chunks <1500.')\n",
182
+ " \n",
183
+ " # try a known result with variable chunk_length and chunk_overlap\n",
184
+ " text = (\"This is a sample text that we will use to test the splitter function. It should split the \" +\n",
185
+ " \"text into multiple chunks of size 1500 with an overlap of 150 characters. This is the second chunk.\")\n",
186
+ " expected_output = ['This is a sample text that we will use to test the',\n",
187
+ " 'test the splitter function. It should split the',\n",
188
+ " 'split the text into multiple chunks of size 1500',\n",
189
+ " 'size 1500 with an overlap of 150 characters. This',\n",
190
+ " 'This is the second chunk.']\n",
191
+ " out_splits = [doc.page_content for doc in rawtext_to_doc_split(text, 50, 10)]\n",
192
+ " assert all([target==expected for target, expected in zip(expected_output, out_splits)]), 'The splitter does not correctly use chunk_size and chunk_overlap.'\n",
193
+ "\n",
194
+ "# Run test\n",
195
+ "test_split_texts()"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "markdown",
200
+ "metadata": {},
201
+ "source": [
202
+ "The following function is used for testing to make sure single files and lists can be accommodated, and that what are returned are lists of documents."
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": null,
208
+ "metadata": {},
209
+ "outputs": [],
210
+ "source": [
211
+ "# a set of tests to make sure that this works on both lists single inputs\n",
212
+ "def test_converters_inputs(test_fcn, files_list=None):\n",
213
+ " if files_list is None:\n",
214
+ " single_file = 'The cat was super cute and adorable'\n",
215
+ " multiple_files = [single_file, 'The dog was also cute and her wet nose is always so cold!']\n",
216
+ " elif isinstance(files_list, str):\n",
217
+ " single_file = files_list\n",
218
+ " multiple_files = [single_file, single_file]\n",
219
+ " elif isinstance(files_list, list):\n",
220
+ " single_file = files_list[0]\n",
221
+ " multiple_files = files_list\n",
222
+ " else:\n",
223
+ " TypeError(\"You've passed in a files_list which is neither a string or a list or None\")\n",
224
+ "\n",
225
+ " # test for single file\n",
226
+ " res = test_fcn(single_file)\n",
227
+ " assert isinstance(res, list), 'FAILED ASSERT in {test_fcn}. A single file should return a list.'\n",
228
+ " assert not isinstance(res[0], list), 'FAILED ASSERT in {test_fcn}. A single file should return a 1-dimensional list.'\n",
229
+ "\n",
230
+ " # test for multiple files\n",
231
+ " res = test_fcn(multiple_files)\n",
232
+ " assert isinstance(res, list), 'FAILED ASSERT in {test_fcn}. A list of files should return a list.'\n",
233
+ " assert not isinstance(res[0], list), 'FAILED ASSERT in {test_fcn}. A list of files should return a 1-dimensional list with all documents combined.'\n",
234
+ "\n",
235
+ " # test that the return type of elements should be Document\n",
236
+ " assert all([isinstance(doc, Document) for doc in res]), 'FAILED ASSERT in {test_fcn}. The return type of elements should be Document.'"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": null,
242
+ "metadata": {},
243
+ "outputs": [],
244
+ "source": [
245
+ "# test behavior of standard text splitter\n",
246
+ "test_converters_inputs(rawtext_to_doc_split)"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "markdown",
251
+ "metadata": {},
252
+ "source": [
253
+ "### File or Files\n",
254
+ "Functions which load a single file or files from a directory, including pdfs, text files, html, images, and more. See [Unstructured File Documentation](https://python.langchain.com/docs/integrations/document_loaders/unstructured_file) for more information."
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "execution_count": null,
260
+ "metadata": {},
261
+ "outputs": [],
262
+ "source": [
263
+ "#| export\n",
264
+ "## A single File\n",
265
+ "def _file_to_text(single_file, chunk_size = 1000, chunk_overlap=150):\n",
266
+ "\n",
267
+ " # Create loader and get segments\n",
268
+ " loader = UnstructuredFileLoader(single_file)\n",
269
+ " doc_segments = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=chunk_size,\n",
270
+ " chunk_overlap=chunk_overlap,\n",
271
+ " add_start_index=True))\n",
272
+ " return doc_segments\n",
273
+ "\n",
274
+ "\n",
275
+ "## Multiple files\n",
276
+ "def files_to_text(files_list, chunk_size=1000, chunk_overlap=150):\n",
277
+ " \n",
278
+ " # Quick type checking\n",
279
+ " if not isinstance(files_list, list):\n",
280
+ " files_list = [files_list]\n",
281
+ "\n",
282
+ " # This is currently a fix because the UnstructuredFileLoader expects a list of files yet can't split them correctly yet\n",
283
+ " all_segments = [_file_to_text(single_file, chunk_size=chunk_size, chunk_overlap=chunk_overlap) for single_file in files_list]\n",
284
+ " all_segments = list(itertools.chain(*all_segments)) if isinstance(all_segments[0], list) else all_segments\n",
285
+ "\n",
286
+ " return all_segments"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": null,
292
+ "metadata": {},
293
+ "outputs": [
294
+ {
295
+ "data": {
296
+ "text/plain": [
297
+ "[Document(page_content='Two roads diverged in a yellow wood,\\rAnd sorry I could not travel both\\rAnd be one traveler, long I', metadata={'source': '../roadnottaken.txt', 'start_index': 0}),\n",
298
+ " Document(page_content='traveler, long I stood\\rAnd looked down one as far as I could\\rTo where it bent in the', metadata={'source': '../roadnottaken.txt', 'start_index': 82}),\n",
299
+ " Document(page_content='it bent in the undergrowth;\\r\\rThen took the other, as just as fair,\\rAnd having perhaps the better', metadata={'source': '../roadnottaken.txt', 'start_index': 152}),\n",
300
+ " Document(page_content='perhaps the better claim,\\rBecause it was grassy and wanted wear;\\rThough as for that the passing', metadata={'source': '../roadnottaken.txt', 'start_index': 230}),\n",
301
+ " Document(page_content='that the passing there\\rHad worn them really about the same,\\r\\rAnd both that morning equally lay\\rIn', metadata={'source': '../roadnottaken.txt', 'start_index': 309}),\n",
302
+ " Document(page_content='equally lay\\rIn leaves no step had trodden black. Oh, I kept the first for another day! Yet knowing', metadata={'source': '../roadnottaken.txt', 'start_index': 392}),\n",
303
+ " Document(page_content='day! Yet knowing how way leads on to way,\\rI doubted if I should ever come back. I shall be telling', metadata={'source': '../roadnottaken.txt', 'start_index': 474}),\n",
304
+ " Document(page_content='I shall be telling this with a sigh\\rSomewhere ages and ages hence:\\rTwo roads diverged in a wood,', metadata={'source': '../roadnottaken.txt', 'start_index': 554}),\n",
305
+ " Document(page_content='diverged in a wood, and IэI took the one less traveled by,\\rAnd that has made all the difference.', metadata={'source': '../roadnottaken.txt', 'start_index': 631}),\n",
306
+ " Document(page_content='Two roads diverged in a yellow wood,\\rAnd sorry I could not travel both\\rAnd be one traveler, long I', metadata={'source': '../roadnottaken.txt', 'start_index': 0}),\n",
307
+ " Document(page_content='traveler, long I stood\\rAnd looked down one as far as I could\\rTo where it bent in the', metadata={'source': '../roadnottaken.txt', 'start_index': 82})]"
308
+ ]
309
+ },
310
+ "execution_count": null,
311
+ "metadata": {},
312
+ "output_type": "execute_result"
313
+ }
314
+ ],
315
+ "source": [
316
+ "# ensure basic behavior\n",
317
+ "res = files_to_text(['../roadnottaken.txt', '../roadnottaken.txt'], chunk_size=100, chunk_overlap=20)\n",
318
+ "res[:11]"
319
+ ]
320
+ },
321
+ {
322
+ "cell_type": "code",
323
+ "execution_count": null,
324
+ "metadata": {},
325
+ "outputs": [],
326
+ "source": [
327
+ "test_converters_inputs(files_to_text, '../roadnottaken.txt')"
328
+ ]
329
+ },
330
+ {
331
+ "cell_type": "markdown",
332
+ "metadata": {},
333
+ "source": [
334
+ "### Youtube\n",
335
+ "This works by first transcribing the video to text."
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "code",
340
+ "execution_count": null,
341
+ "metadata": {},
342
+ "outputs": [],
343
+ "source": [
344
+ "#| export\n",
345
+ "def youtube_to_text(urls, save_dir = \"content\"):\n",
346
+ " # Transcribe the videos to text\n",
347
+ " # save_dir: directory to save audio files\n",
348
+ "\n",
349
+ " if not isinstance(urls, list):\n",
350
+ " urls = [urls]\n",
351
+ " \n",
352
+ " youtube_loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser())\n",
353
+ " youtube_docs = youtube_loader.load()\n",
354
+ " \n",
355
+ " return youtube_docs"
356
+ ]
357
+ },
358
+ {
359
+ "cell_type": "markdown",
360
+ "metadata": {},
361
+ "source": [
362
+ "Now, let's demonstrate functionality using some existing YouTube videos"
363
+ ]
364
+ },
365
+ {
366
+ "cell_type": "code",
367
+ "execution_count": null,
368
+ "metadata": {},
369
+ "outputs": [],
370
+ "source": [
371
+ "# Two Karpathy lecture videos\n",
372
+ "urls = [\"https://youtu.be/kCc8FmEb1nY\", \"https://youtu.be/VMj-3S1tku0\"]\n",
373
+ "youtube_text = youtube_to_text(urls)\n",
374
+ "youtube_text"
375
+ ]
376
+ },
377
+ {
378
+ "cell_type": "markdown",
379
+ "metadata": {},
380
+ "source": [
381
+ "Other Youtube helper functions to help with getting full features of YouTube videos are included below. These two grab and save the text of the transcripts.\n",
382
+ "\n",
383
+ "<p style=\"color:red\"><strong>Note that in this stage of development, the following cannot be tested due to YouTube download errors.</strong></p>"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": null,
389
+ "metadata": {},
390
+ "outputs": [],
391
+ "source": [
392
+ "#| export\n",
393
+ "def save_text(text, text_name = None):\n",
394
+ " if not text_name:\n",
395
+ " text_name = text[:20]\n",
396
+ " text_path = os.path.join(\"/content\",text_name+\".txt\")\n",
397
+ " \n",
398
+ " with open(text_path, \"x\") as f:\n",
399
+ " f.write(text)\n",
400
+ " # Return the location at which the transcript is saved\n",
401
+ " return text_path"
402
+ ]
403
+ },
404
+ {
405
+ "cell_type": "code",
406
+ "execution_count": null,
407
+ "metadata": {},
408
+ "outputs": [],
409
+ "source": [
410
+ "#| export\n",
411
+ "def get_youtube_transcript(yt_url, save_transcript = False, temp_audio_dir = \"sample_data\"):\n",
412
+ " # Transcribe the videos to text and save to file in /content\n",
413
+ " # save_dir: directory to save audio files\n",
414
+ "\n",
415
+ " youtube_docs = youtube_to_text(yt_url, save_dir = temp_audio_dir)\n",
416
+ " \n",
417
+ " # Combine doc\n",
418
+ " combined_docs = [doc.page_content for doc in youtube_docs]\n",
419
+ " combined_text = \" \".join(combined_docs)\n",
420
+ " \n",
421
+ " # Save text to file\n",
422
+ " video_path = youtube_docs[0].metadata[\"source\"]\n",
423
+ " youtube_name = os.path.splitext(os.path.basename(video_path))[0]\n",
424
+ "\n",
425
+ " save_path = None\n",
426
+ " if save_transcript:\n",
427
+ " save_path = save_text(combined_text, youtube_name)\n",
428
+ " \n",
429
+ " return youtube_docs, save_path"
430
+ ]
431
+ },
432
+ {
433
+ "cell_type": "markdown",
434
+ "metadata": {},
435
+ "source": [
436
+ "### Websites\n",
437
+ "We have a few different approaches to reading website text. Some approaches are specifically provided through langchain and some are other packages that seem to be performant. We'll show the pros/cons of each approach below.\n",
438
+ "\n",
439
+ "#### Langchain: WebBaseLoader"
440
+ ]
441
+ },
442
+ {
443
+ "cell_type": "code",
444
+ "execution_count": null,
445
+ "metadata": {},
446
+ "outputs": [],
447
+ "source": [
448
+ "#| export\n",
449
+ "def website_to_text_web(url, chunk_size = 1500, chunk_overlap=100):\n",
450
+ " \n",
451
+ " # Url can be a single string or list\n",
452
+ " website_loader = WebBaseLoader(url)\n",
453
+ " website_raw = website_loader.load()\n",
454
+ "\n",
455
+ " website_data = rawtext_to_doc_split(website_raw, chunk_size = chunk_size, chunk_overlap=chunk_overlap)\n",
456
+ " \n",
457
+ " # Combine doc\n",
458
+ " return website_data"
459
+ ]
460
+ },
461
+ {
462
+ "cell_type": "markdown",
463
+ "metadata": {},
464
+ "source": [
465
+ "Now for a quick test to ensure functionality..."
466
+ ]
467
+ },
468
+ {
469
+ "cell_type": "code",
470
+ "execution_count": null,
471
+ "metadata": {},
472
+ "outputs": [],
473
+ "source": [
474
+ "demo_urls = [\"https://www.espn.com/\", \"https://www.vanderbilt.edu/undergrad-datascience/faq\"]"
475
+ ]
476
+ },
477
+ {
478
+ "cell_type": "code",
479
+ "execution_count": null,
480
+ "metadata": {},
481
+ "outputs": [
482
+ {
483
+ "data": {
484
+ "text/plain": [
485
+ "[Document(page_content=\"ESPN - Serving Sports Fans. Anytime. Anywhere.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n Skip to navigation\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n<\\n\\n>\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nMenuESPN\\n\\n\\nSearch\\n\\n\\n\\nscores\\n\\n\\n\\nNFLMLBNBANHLSoccerGolf…Women's World CupNCAAFNCAAMNCAAWSports BettingBoxingCFLNCAACricketF1HorseMMANASCARNBA G LeagueOlympic SportsPLLRacingRN BBRN FBRugbyTennisWNBAWWEX GamesXFLMore ESPNFantasyListenWatchESPN+\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nSUBSCRIBE NOW\\n\\n\\n\\n\\n\\nPaul vs. Diaz (ESPN+ PPV)\\n\\n\\n\\n\\n\\n\\n\\nPGA TOUR LIVE\\n\\n\\n\\n\\n\\n\\n\\nLittle League Baseball: Regionals\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nCrossFit Games\\n\\n\\n\\n\\n\\n\\n\\nSlamBall\\n\\n\\n\\n\\n\\n\\n\\nThe Ultimate Fighter: Season 31\\n\\n\\n\\n\\n\\n\\n\\nFantasy Football: Top Storylines, Rookies, Sleepers\\n\\n\\nQuick Links\\n\\n\\n\\n\\nWomen's World Cup\\n\\n\\n\\n\\n\\n\\n\\nNHL Free Agency\\n\\n\\n\\n\\n\\n\\n\\nNBA Free Agency Buzz\\n\\n\\n\\n\\n\\n\\n\\nNBA Trade Machine\\n\\n\\n\\n\\n\\n\\n\\nThe Basketball Tournament\\n\\n\\n\\n\\n\\n\\n\\nFantasy Football: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\n\\n\\n\\n\\nFavorites\\n\\n\\n\\n\\n\\n\\n Manage Favorites\\n \\n\\n\\n\\nCustomize ESPNSign UpLog InESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nX/Twitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nTikTok\\n\\n\\n\\n\\n\\n\\n\\nYouTube\", metadata={'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'}),\n",
486
+ " Document(page_content=\"How can your team win the national title? Connelly breaks down what needs to go right for all 17 contendersThe fewer things that have to go right to win a title, the better a team's chances of taking the crown. Here's what has to fall each contender's way.7hBill ConnellyDale Zanine/USA TODAY SportsPosition U 2023: Is USC on the verge of taking over QBU from Oklahoma?Which schools produce the most talent at each position?1dDavid HaleConnelly's conference previews: Intel on all 133 FBS teamsTOP HEADLINESFreeze 'uncomfortable' as Auburn opens campTexans' Metchie relied on faith amid cancer fightHornets have new owners after MJ sale finalizedMiami coach expects rough treatment of MessiDrexel basketball player found dead in apartmentGermany exits WWC after draw with South KoreaBrady takes minority stake in English soccer teamDeep dish: Cubs' output at plate best since 1897Re-drafting 2018 NFL class 5 years laterWHAT HAPPENED IN INDY?Inside the shocking feud between Jonathan Taylor and the ColtsHe was the NFL's leading rusher two seasons ago and wanted an extension with the Colts, but now he wants out. How things got so bad for Taylor and Indianapolis.8hStephen HolderZach Bolinger/Icon Sportswire'THE BEST IN THE WORLD RIGHT NOW'Why Stephen A. is convinced Tyreek Hill is the NFL's top WR2h2:57WYNDHAM CHAMPIONSHIPCONTINUES THROUGH SUNDAYShane Lowry fluffs shot, drains birdie chip immediately after3h0:35Countdown to FedEx Cup Playoffs, AIG Open and the Ryder CupDiana Taurasi, 10,000\", metadata={'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'}),\n",
487
+ " Document(page_content=\"Lowry fluffs shot, drains birdie chip immediately after3h0:35Countdown to FedEx Cup Playoffs, AIG Open and the Ryder CupDiana Taurasi, 10,000 points and the shot that made WNBA scoring history1dMLB SCOREBOARDTHURSDAY'S GAMESSee AllTrivia: Can you guess the right player?HERE COMES HELPBring on the reinforcements! 10 returning players as good as a trade deadline blockbusterInjured stars expected to come off the IL soon -- or have already -- could rock MLB's playoff races.7hAlden GonzalezJay Biggerstaff-USA TODAY Sports'CLEARLY THE ACC IS STRUGGLING'Finebaum: FSU is better off leaving the ACC5h1:04Thamel's realignment buzz: Latest on Pac-12, Big 12 and ACCAN AGGRESSIVE STRATEGYHow the Big 12 landed Colorado and shook up college footballThe Big 12 learned lessons two years ago after getting burned by Texas and Oklahoma. It resulted in a more aggressive strategy that could dramatically change the sport.2dHeather DinichRaymond Carlin/Icon Sportswire Top HeadlinesFreeze 'uncomfortable' as Auburn opens campTexans' Metchie relied on faith amid cancer fightHornets have new owners after MJ sale finalizedMiami coach expects rough treatment of MessiDrexel basketball player found dead in apartmentGermany exits WWC after draw with South KoreaBrady takes minority stake in English soccer teamDeep dish: Cubs' output at plate best since 1897Re-drafting 2018 NFL class 5 years laterFavorites FantasyManage FavoritesFantasy HomeCustomize ESPNSign UpLog InICYMI0:54Serena Williams, Alexis Ohanian\", metadata={'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'}),\n",
488
+ " Document(page_content='2018 NFL class 5 years laterFavorites FantasyManage FavoritesFantasy HomeCustomize ESPNSign UpLog InICYMI0:54Serena Williams, Alexis Ohanian use drones to reveal gender of 2nd childSerena Williams and her husband Alexis Ohanian find out the gender of their second child in a spectacular display of drones. Best of ESPN+Todd Kirkland/Getty ImagesMLB 2023 trade deadline: Winners, losers and in-betweenersThe 2023 trade deadline is over! Who crushed it, and who left much to be desired? We weigh in on all 30 clubs.AP Photo/Matt YorkLowe: Why Bradley Beal could unlock KD, Book and the most dangerous version of the Phoenix Suns yetWith Kevin Durant, Devin Booker and Beal, Phoenix is already an inner-circle title contender. But if the Suns continue a Beal experiment the Wizards ran last season? Good luck.Cliff Welch/Icon SportswirePredicting 10 NFL starting quarterback battles: Who is QB1?We talked to people around the NFL and projected the QB1 for 10 unsettled situations, including a wide-open race in Tampa Bay. Trending NowAP Photo/Julio Cortez\\'Revis Island\\' resonates long after Hall of Famer\\'s retirementDarrelle Revis made his name as a dominant corner but might be best known for his \"island\" moniker players still adopt today.Illustration by ESPNThe wild life of Gardner MinshewFour colleges, three NFL teams, two Manias and the hug that broke the internet. It\\'s been an unbelievable ride for Gardner Minshew. Next stop: Indianapolis.Illustration by ESPNBest 2023 Women\\'s World Cup', metadata={'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'}),\n",
489
+ " Document(page_content=\"that broke the internet. It's been an unbelievable ride for Gardner Minshew. Next stop: Indianapolis.Illustration by ESPNBest 2023 Women's World Cup players: Morgan, Caicedo, moreESPN's expert panel selected the top 25 players of the Women's World Cup to keep an eye on, from Sophia Smith to Sam Kerr and more. How to Watch on ESPN+(AP Photo/Koji Sasahara, File)How to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN, ESPN+Here's everything you need to know about how to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN and ESPN+. Sign up to play the #1 Fantasy game!Create A LeagueJoin Public LeagueReactivateMock Draft NowSign up for FREE!Create A LeagueJoin a Public LeagueReactivate a LeaguePractice With a Mock DraftSign up for FREE!Create A LeagueJoin a Public LeagueReactivate a LeaguePractice with a Mock Draft\", metadata={'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'}),\n",
490
+ " Document(page_content=\"ESPN+\\n\\n\\n\\n\\nPaul vs. Diaz (ESPN+ PPV)\\n\\n\\n\\n\\n\\n\\n\\nPGA TOUR LIVE\\n\\n\\n\\n\\n\\n\\n\\nLittle League Baseball: Regionals\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nCrossFit Games\\n\\n\\n\\n\\n\\n\\n\\nSlamBall\\n\\n\\n\\n\\n\\n\\n\\nThe Ultimate Fighter: Season 31\\n\\n\\n\\n\\n\\n\\n\\nFantasy Football: Top Storylines, Rookies, Sleepers\\n\\n\\nQuick Links\\n\\n\\n\\n\\nWomen's World Cup\\n\\n\\n\\n\\n\\n\\n\\nNHL Free Agency\\n\\n\\n\\n\\n\\n\\n\\nNBA Free Agency Buzz\\n\\n\\n\\n\\n\\n\\n\\nNBA Trade Machine\\n\\n\\n\\n\\n\\n\\n\\nThe Basketball Tournament\\n\\n\\n\\n\\n\\n\\n\\nFantasy Football: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\nESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nX/Twitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nTikTok\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\nTerms of UsePrivacy PolicyYour US State Privacy RightsChildren's Online Privacy PolicyInterest-Based AdsAbout Nielsen MeasurementDo Not Sell or Share My Personal InformationContact UsDisney Ad Sales SiteWork for ESPNCopyright: © ESPN Enterprises, Inc. All rights reserved.\", metadata={'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'}),\n",
491
+ " Document(page_content='Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSkip to main content\\n\\nlink\\n\\n\\n\\n\\n\\nHome\\nPeople\\nMinor\\n\\nMinor Requirements\\nCourse Descriptions\\nCourse Schedule\\nHow to Declare the Minor\\nChoosing a Minor\\n\\n\\nResearch and Immersion\\n\\nResearch and Immersion Overview\\nDS 3850 Research in Data Science\\nDSI Summer Research Program\\nData Science for Social Good\\nResearch Immersion in Data Science\\nDSI Internship\\n\\n\\nFAQ\\nNews\\nForms\\nContact and Email List\\nData Science Institute\\n \\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\tUndergraduate Data Science \\n\\n\\n\\n\\n\\n\\n\\n\\n\\nFrequently Asked Questions\\nDeclaring the Minor\\n\\n\\n\\nHow do I declare the Data Science Minor?Use the forms and follow the procedures for your home college. See How to Declare the Data Science Minor.\\n\\n\\nWhen should I declare the Data Science Minor?While minor declarations can be made any time, DS courses will give some preference to students who have officially declared the Data Science Minor. So we recommend declaring the minor sooner rather than later. It is always possible to drop a declared minor. Minor declarations must be submitted at least two weeks before registration begins. Otherwise, the minor declaration will not be processed until after registration. No preference will be given during registration for an “intent” to declare because the minor declaration was made too late.', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
492
+ " Document(page_content='I declared the Data Science Minor, but I did not get into the class I wanted to take for the minor. Why?First, preference for students who have declared the minor only applies to DS courses, not other courses. Second, if you declared the minor within two weeks of registration, your minor declaration will. not show up on YES, and you will not have preference. Third, while we try to hold as many seats for students who have declared the minor as we can, not all seats are reserved.\\n\\n\\nI am a first-year A&S student. Can I really declare the Data Science Minor now?Yes. While A&S students are usually prevented from declaring a major or minor until sophomore year, first-year A&S students can declare the Data Science Minor. As noted in the previous question, this can be important to do since some popular core DS courses will give some preference to students who have officially declared Data Science as a minor.', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
493
+ " Document(page_content='I am a current junior (rising senior), can I complete the Data Science Minor (for Spring 2021 juniors only)?Juniors must contact the Director of Undergraduate Data Science to discuss options. DS 1000 is not open to current juniors (rising seniors). DS 3100 will not be taught next year (Fall 2021 or Spring 2022) and will need to be suitably replaced, which will require an approved plan from the Director. Furthermore, while DS / CS 3262 is current slated to be taught Spring 2022, that is not fully guaranteed, so students should see if they can take one of the other machine learning options.\\n\\n\\nI am a rising senior or current senior and cannot register for DS 1000. Why?Rising seniors and current seniors can only register for DS 1000 if there are available seats immediately before the semester begins with permission of the instructor. DS 1000 is intended as an introduction to data science for first years and sophomores, which is why this restriction is in place.\\n\\n\\nCollege-Specific Information\\n\\n\\n\\nWhat college is the home of the Data Science Minor?The Data Science Minor is a trans-institutional minor, shared by A&S, Blair, Engineering, and Peabody.', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
494
+ " Document(page_content='I am an A&S student. Do DS courses count as A&S courses?All courses with a DS prefix count as courses within each of the colleges, including A&S. If you are an A&S student, and are taking a course that is cross-listed, make sure you enroll in the one with the DS prefix. Electives outside of A&S without the DS prefix will generally not count as A&S courses, so plan accordingly.\\n\\n\\nWhat are the unique credit hour rules for the Data Science Minor?Students electing an undergraduate minor in Data Science must follow academic regulations regarding minors in their home college, including but not limited to regulations regarding unique hours. The unique credit hour rule is specific to the College of Arts and Science and Peabody College. The School of Engineering and Blair School of Music do not have a unique credit hour rule. The Data Science minor cannot waive this rule. Please talk with your academic advisor about how to satisfy these requirements.\\n\\n\\nInfo About the Courses\\n\\n\\n\\nDS 1000Thank you for your interest in DS 1000! The course is full for the fall 2021 semester. Due to student demand and the transinstitutional nature of the course, we cannot make special exceptions as to which students, if any, on the waitlist are able to enroll. DS 1000 will be offered again in the spring semester.', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
495
+ " Document(page_content='What computer programming course should I take?See What Programming Course To Take? In general, students interested in data science and scientific computing (not in computer science per se) should learn Python (and R).\\n\\n\\nHow do I find courses approved for the data science minor on YES?On YES, to select all courses approved for credit in the Data Science minor offered in a given semester, select the “Advanced” link next to the search box, select the “Class Attributes” drop-down box on the bottom right of the advanced search page, and then select “Eligible for Data Science” to find all courses. (Note that these course tags will not all be in place on YES until the registration period for Fall 2021 begins.)\\n\\n\\nCan other courses, besides those listed, count towards the Data Science Minor?New courses, special topics courses, or graduate-level courses that seem related to data science could count as electives. Contact the Director of Undergraduate Data Science to request consideration.', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
496
+ " Document(page_content='Why doesn’t CS 1104 count towards the Data Science Minor?It does, as a prerequisite to CS 2204, which counts towards the minor. CS / DS 1100 was created as a new single-semester programming course for the Data Science Minor. It roughly has 2/3 the content of CS 1104 and 1/3 the content of CS 2204. While CS / DS 1100 counts as a single semester of programming for the minor, we strongly encourage students interested in data science, and in using data science tools and techniques, to take two semesters of programming in Python (CS / DS 1100 or CS 1104, followed by CS 2204). If you have taken CS 1104, you can take CS 1100, but you will only receive a total of four credits for\\xa0the two courses. See also What Programming Course To Take?', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
497
+ " Document(page_content='I see that after having taken CS 1104, I can take CS/DS 1100 instead of taking CS 2204. What are the downsides of doing so?After taking CS 1104, we do recommend you take CS 2204. If you are interested in data science, a broader experience in Python in desirable (in fact, we recommend that students having taken CS 1100 try to take CS 2204 as well). CS/DS 1100 and 1104 have significant overlap (both are introductions to programming using Python). That said, it is permissible to take CS/DS 1100 after having taken CS 1104. You will only get 1 (out of 3) credit hours for CS/DS 1100 (after having taken CS 1104), but the combination of CS/DS 1100 and 1104 will satisfy the DS minor programming requirement. Note that if you enroll in three 3-hour courses and CS/DS 1100 (after having taken CS 1104) it will look like you are registered for 12 credit hours during registration and at the start of the semester, but your credit hours will be reduced to only 10 credit hours (because the credits for CS/DS 1100 will be cut back to 1 after the add/drop period). Enrolling in fewer than 12 credit hours can have significant consequences on financial aid and potentially on visa status for international students. Please be mindful of this.\\n\\n\\nWhat is the difference between CS 1100 and DS 1100?Nothing. They are the same course. They meet the same time in the same place and are taught by the same instructor. They are just cross-listed.', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
498
+ " Document(page_content='I have taken CS 1101. What computer programming course should I take next?You have two options. You can either take CS 2201 (in C++) or take CS 1100 (in Python). Of course, you could also take CS 1104 and 2204 (in Python). CS 1100, 2201, and 2204 all satisfy the programming requirement for the minor. Note that CS 2201 is a prerequisite for many upper-level CS courses (as well as required for the CS major and minor). For more information, see What Programming Course To Take?\\n\\n\\nECON 3750 and MATH 3670 are listed both as satisfying the core machine learning requirement and as electives. If I take one, will it double-count for both requirements?No. They are listed under both because a student who takes one of the other machine learning\\xa0courses to satisfy the core requirement (CS/DS 3262 or CS 4262) can also take ECON 3750 or MATH 3670 as an elective; the content is sufficiently different that both can count towards the minor, but one course cannot double-count for two minor requirements.', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
499
+ " Document(page_content='Can I take ECON 3750 or MATH 3670 as an elective if I have already taken CS 3262 or CS 4262?Yes (see above). ECON 3750 and MATH 3670 are sufficiently different from CS 3262 or CS 4262 (and from each other) that you can take these as electives. In fact, you could take ECON 3750 to satisfy the machine learning requirement and then take MATH 3670 as an elective.\\nCS 3262 can count towards the Data Science minor. CS 3262 does not count directly towards the Computer Science major requirements but could be used as either a tech elective or open elective for Computer Science majors.\\n\\n\\nWhy doesn’t MATH 2820 count towards the Data Science Minor?It does, as a prerequisite to MATH 2821, which counts towards the minor. The two-course sequence of MATH 2820 and MATH 2821 counts towards the Data Science Minor; the\\xa0two-course sequence is required because MATH 2820 goes deep into mathematical foundations of probability ad statistics concepts, but does not by itself cover the breadth of topics of other introductory statistics courses. This two-course sequence provides an excellent introduction to mathematical statistics.\\n\\n\\nResearch and Immersion Information\\n\\n\\n\\nCan I do research for course credit?Yes, you can do research for course credit (including DS 3850). More information can be found here: https://www.vanderbilt.edu/undergrad-datascience/ds-3850-research-in-data-science/', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
500
+ " Document(page_content='I am interested in the Undergraduate Data Science Immersion Program. How can I participate.Some competitive summer immersion programs include DSI-SPR and Data Science for Social Good (DSSG). More information can be found on the following websites.\\n\\nhttps://www.vanderbilt.edu/datascience/academics/undergraduate/summer-research-program/\\nhttps://www.vanderbilt.edu/datascience/data-science-for-social-good/\\n\\nTo get involved in data-science-oriented research with a faculty member, you will need to reach out to the faculty member. Pointers can be found here: https://www.vanderbilt.edu/undergrad-datascience/research-and-immersion-overview/. Having that research count towards the immersion requirement will be between your faculty mentor and your faculty immersion coordinator.\\nAdditional information about research opportunities will be posted on the website in the future.\\n\\xa0\\n\\n\\nContact\\n\\n\\n\\nHow do I ask a question about the Data Science?If you have questions about the Data Science Minor or Immersion opportunities in data science, please email us: [email protected]\\n\\n\\nTo whom can I petition if the Director denies my request?The Governing Board of the Data Science Minor acts as the college-level oversight body for this trans-institutional minor and would be the appropriate next step for petitions related to the minor.\\n\\n\\n\\n\\n\\n\\n\\nData Science News\\n\\n\\n\\n Opportunities for Capstone Projects and Research Experience\\n\\n\\n\\n Attention Graduate Students! We’re Hiring!', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'}),\n",
501
+ " Document(page_content='Data Science News\\n\\n\\n\\n Opportunities for Capstone Projects and Research Experience\\n\\n\\n\\n Attention Graduate Students! We’re Hiring!\\n\\n\\n\\n Vanderbilt student-athlete drives sports performance through data analysis\\n\\n\\n\\n New Course: DS 3891 Special Topics: Intro to Generative AI\\n\\n\\n\\n Now Accepting Applications: DS Minor Teaching Fellowship for graduate students\\n\\n\\n\\n Join Our Team: Student Worker Positions Available for Fall 2023 Semester!\\n\\n\\n\\n\\n\\nVIEW MORE EVENTS >\\n\\n\\n\\n\\nYour Vanderbilt\\n\\nAlumni\\nCurrent Students\\nFaculty & Staff\\nInternational Students\\nMedia\\nParents & Family\\nProspective Students\\nResearchers\\nSports Fans\\nVisitors & Neighbors\\n\\n\\n\\n\\n \\n\\n\\n\\nQuick Links\\n\\nPeopleFinder\\nLibraries\\nNews\\nCalendar\\nMaps\\nA-Z\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n©\\n Vanderbilt University · All rights reserved. Site Development: Digital Strategies (Division of Communications)\\nVanderbilt University is committed to principles of equal opportunity and affirmative action. Accessibility information. Vanderbilt®, Vanderbilt University®, V Oak Leaf Design®, Star V Design® and Anchor Down® are trademarks of The Vanderbilt University', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq', 'title': 'Frequently Asked Questions | Undergraduate Data Science | Vanderbilt University', 'description': 'Frequently Asked Questions. ', 'language': 'en'})]"
502
+ ]
503
+ },
504
+ "execution_count": null,
505
+ "metadata": {},
506
+ "output_type": "execute_result"
507
+ }
508
+ ],
509
+ "source": [
510
+ "# get the results\n",
511
+ "res_web = website_to_text_web(demo_urls)\n",
512
+ "\n",
513
+ "res_web"
514
+ ]
515
+ },
516
+ {
517
+ "cell_type": "code",
518
+ "execution_count": null,
519
+ "metadata": {},
520
+ "outputs": [],
521
+ "source": [
522
+ "#unit testbed\n",
523
+ "test_converters_inputs(website_to_text_web, demo_urls)"
524
+ ]
525
+ },
526
+ {
527
+ "cell_type": "markdown",
528
+ "metadata": {},
529
+ "source": [
530
+ "Something interesting that we notice here is the proliferation of new lines that aren't for the best.\n",
531
+ "\n",
532
+ "#### Langchain: UnstructuredURLLoader"
533
+ ]
534
+ },
535
+ {
536
+ "cell_type": "code",
537
+ "execution_count": null,
538
+ "metadata": {},
539
+ "outputs": [],
540
+ "source": [
541
+ "#| export\n",
542
+ "def website_to_text_unstructured(web_urls, chunk_size = 1500, chunk_overlap=100):\n",
543
+ "\n",
544
+ " # Make sure it's a list\n",
545
+ " if not isinstance(web_urls, list):\n",
546
+ " web_urls = [web_urls]\n",
547
+ " \n",
548
+ " # Url can be a single string or list\n",
549
+ " website_loader = UnstructuredURLLoader(web_urls)\n",
550
+ " website_raw = website_loader.load()\n",
551
+ "\n",
552
+ " website_data = rawtext_to_doc_split(website_raw, chunk_size = chunk_size, chunk_overlap=chunk_overlap)\n",
553
+ " \n",
554
+ " # Return individual docs or list\n",
555
+ " return website_data"
556
+ ]
557
+ },
558
+ {
559
+ "cell_type": "code",
560
+ "execution_count": null,
561
+ "metadata": {},
562
+ "outputs": [
563
+ {
564
+ "data": {
565
+ "text/plain": [
566
+ "[Document(page_content=\"Menu\\n\\nESPN\\n\\nSearch\\n\\n\\n\\nscores\\n\\nNFL\\n\\nMLB\\n\\nNBA\\n\\nNHL\\n\\nSoccer\\n\\nGolf\\n\\n…Women's World CupNCAAFNCAAMNCAAWSports BettingBoxingCFLNCAACricketF1HorseMMANASCARNBA G LeagueOlympic SportsPLLRacingRN BBRN FBRugbyTennisWNBAWWEX GamesXFL\\n\\nMore ESPN\\n\\nFantasy\\n\\nListen\\n\\nWatch\\n\\nESPN+\\n\\nSUBSCRIBE NOW\\n\\nPaul vs. Diaz (ESPN+ PPV)\\n\\nPGA TOUR LIVE\\n\\nLittle League Baseball: Regionals\\n\\nMLB: Select Games\\n\\nCrossFit Games\\n\\nSlamBall\\n\\nThe Ultimate Fighter: Season 31\\n\\nFantasy Football: Top Storylines, Rookies, Sleepers\\n\\nQuick Links\\n\\nWomen's World Cup\\n\\nNHL Free Agency\\n\\nNBA Free Agency Buzz\\n\\nNBA Trade Machine\\n\\nThe Basketball Tournament\\n\\nFantasy Football: Sign Up\\n\\nHow To Watch PGA TOUR\\n\\nFavorites\\n\\nManage Favorites\\n\\nCustomize ESPN\\n\\nESPN Sites\\n\\nESPN Deportes\\n\\nAndscape\\n\\nespnW\\n\\nESPNFC\\n\\nX Games\\n\\nSEC Network\\n\\nESPN Apps\\n\\nESPN\\n\\nESPN Fantasy\\n\\nFollow ESPN\\n\\nFacebook\\n\\nX/Twitter\\n\\nInstagram\\n\\nSnapchat\\n\\nTikTok\\n\\nYouTube\\n\\nHow can your team win the national title? Connelly breaks down what needs to go right for all 17 contendersThe fewer things that have to go right to win a title, the better a team's chances of taking the crown. Here's what has to fall each contender's way.7hBill ConnellyDale Zanine/USA TODAY Sports\\n\\nPosition U 2023: Is USC on the verge of taking over QBU from Oklahoma?Which schools produce the most talent at each position?1dDavid Hale\\n\\nConnelly's conference previews: Intel on all 133 FBS teams\\n\\nTOP HEADLINES\\n\\nFreeze 'uncomfortable' as Auburn opens camp\\n\\nTexans' Metchie relied on faith amid cancer fight\", metadata={'source': 'https://www.espn.com/'}),\n",
567
+ " Document(page_content=\"TOP HEADLINES\\n\\nFreeze 'uncomfortable' as Auburn opens camp\\n\\nTexans' Metchie relied on faith amid cancer fight\\n\\nHornets have new owners after MJ sale finalized\\n\\nMiami coach expects rough treatment of Messi\\n\\nDrexel basketball player found dead in apartment\\n\\nGermany exits WWC after draw with South Korea\\n\\nBrady takes minority stake in English soccer team\\n\\nDeep dish: Cubs' output at plate best since 1897\\n\\nRe-drafting 2018 NFL class 5 years later\\n\\nWHAT HAPPENED IN INDY?\\n\\nInside the shocking feud between Jonathan Taylor and the ColtsHe was the NFL's leading rusher two seasons ago and wanted an extension with the Colts, but now he wants out. How things got so bad for Taylor and Indianapolis.8hStephen HolderZach Bolinger/Icon Sportswire\\n\\n'THE BEST IN THE WORLD RIGHT NOW'\\n\\nWhy Stephen A. is convinced Tyreek Hill is the NFL's top WR\\n\\n2h\\n\\n2:57\\n\\nWYNDHAM CHAMPIONSHIP\\n\\nCONTINUES THROUGH SUNDAY\\n\\nShane Lowry fluffs shot, drains birdie chip immediately after\\n\\n4h\\n\\n0:35\\n\\nCountdown to FedEx Cup Playoffs, AIG Open and the Ryder Cup\\n\\nDiana Taurasi, 10,000 points and the shot that made WNBA scoring history1d\\n\\nMLB SCOREBOARDTHURSDAY'S GAMES\\n\\nSee All\\n\\nTrivia: Can you guess the right player?\\n\\nHERE COMES HELP\\n\\nBring on the reinforcements! 10 returning players as good as a trade deadline blockbusterInjured stars expected to come off the IL soon -- or have already -- could rock MLB's playoff races.7hAlden GonzalezJay Biggerstaff-USA TODAY Sports\\n\\n'CLEARLY THE ACC IS STRUGGLING'\", metadata={'source': 'https://www.espn.com/'}),\n",
568
+ " Document(page_content=\"'CLEARLY THE ACC IS STRUGGLING'\\n\\nFinebaum: FSU is better off leaving the ACC\\n\\n5h\\n\\n1:04\\n\\nThamel's realignment buzz: Latest on Pac-12, Big 12 and ACC\\n\\nAN AGGRESSIVE STRATEGY\\n\\nHow the Big 12 landed Colorado and shook up college footballThe Big 12 learned lessons two years ago after getting burned by Texas and Oklahoma. It resulted in a more aggressive strategy that could dramatically change the sport.2dHeather DinichRaymond Carlin/Icon Sportswire\\n\\nTop Headlines\\n\\nFreeze 'uncomfortable' as Auburn opens camp\\n\\nTexans' Metchie relied on faith amid cancer fight\\n\\nHornets have new owners after MJ sale finalized\\n\\nMiami coach expects rough treatment of Messi\\n\\nDrexel basketball player found dead in apartment\\n\\nGermany exits WWC after draw with South Korea\\n\\nBrady takes minority stake in English soccer team\\n\\nDeep dish: Cubs' output at plate best since 1897\\n\\nRe-drafting 2018 NFL class 5 years later\\n\\nFavorites\\n\\nFantasy\\n\\nManage Favorites\\n\\nFantasy Home\\n\\nCustomize ESPN\\n\\nICYMI\\n\\n0:54\\n\\nSerena Williams, Alexis Ohanian use drones to reveal gender of 2nd childSerena Williams and her husband Alexis Ohanian find out the gender of their second child in a spectacular display of drones.\\n\\nBest of ESPN+\\n\\nTodd Kirkland/Getty Images\\n\\nMLB 2023 trade deadline: Winners, losers and in-betweenersThe 2023 trade deadline is over! Who crushed it, and who left much to be desired? We weigh in on all 30 clubs.\\n\\nAP Photo/Matt York\", metadata={'source': 'https://www.espn.com/'}),\n",
569
+ " Document(page_content='AP Photo/Matt York\\n\\nLowe: Why Bradley Beal could unlock KD, Book and the most dangerous version of the Phoenix Suns yetWith Kevin Durant, Devin Booker and Beal, Phoenix is already an inner-circle title contender. But if the Suns continue a Beal experiment the Wizards ran last season? Good luck.\\n\\nCliff Welch/Icon Sportswire\\n\\nPredicting 10 NFL starting quarterback battles: Who is QB1?We talked to people around the NFL and projected the QB1 for 10 unsettled situations, including a wide-open race in Tampa Bay.\\n\\nTrending Now\\n\\nAP Photo/Julio Cortez\\n\\n\\'Revis Island\\' resonates long after Hall of Famer\\'s retirementDarrelle Revis made his name as a dominant corner but might be best known for his \"island\" moniker players still adopt today.\\n\\nIllustration by ESPN\\n\\nThe wild life of Gardner MinshewFour colleges, three NFL teams, two Manias and the hug that broke the internet. It\\'s been an unbelievable ride for Gardner Minshew. Next stop: Indianapolis.\\n\\nIllustration by ESPN\\n\\nBest 2023 Women\\'s World Cup players: Morgan, Caicedo, moreESPN\\'s expert panel selected the top 25 players of the Women\\'s World Cup to keep an eye on, from Sophia Smith to Sam Kerr and more.\\n\\nHow to Watch on ESPN+\\n\\n(AP Photo/Koji Sasahara, File)\\n\\nHow to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN, ESPN+Here\\'s everything you need to know about how to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN and ESPN+.\\n\\nSign up to play the #1 Fantasy game!', metadata={'source': 'https://www.espn.com/'}),\n",
570
+ " Document(page_content=\"Sign up to play the #1 Fantasy game!\\n\\nCreate A League\\n\\nJoin Public League\\n\\nReactivate\\n\\nMock Draft Now\\n\\nSign up for FREE!\\n\\nCreate A League\\n\\nJoin a Public League\\n\\nReactivate a League\\n\\nPractice With a Mock Draft\\n\\nSign up for FREE!\\n\\nCreate A League\\n\\nJoin a Public League\\n\\nReactivate a League\\n\\nPractice with a Mock Draft\\n\\nESPN+\\n\\nWatch Now\\n\\nPaul vs. Diaz (ESPN+ PPV)\\n\\nPGA TOUR LIVE\\n\\nLittle League Baseball: Regionals\\n\\nMLB: Select Games\\n\\nCrossFit Games\\n\\nSlamBall\\n\\nThe Ultimate Fighter: Season 31\\n\\nFantasy Football: Top Storylines, Rookies, Sleepers\\n\\nQuick Links\\n\\nWomen's World Cup\\n\\nNHL Free Agency\\n\\nNBA Free Agency Buzz\\n\\nNBA Trade Machine\\n\\nThe Basketball Tournament\\n\\nFantasy Football: Sign Up\\n\\nHow To Watch PGA TOUR\\n\\nESPN Sites\\n\\nESPN Deportes\\n\\nAndscape\\n\\nespnW\\n\\nESPNFC\\n\\nX Games\\n\\nSEC Network\\n\\nESPN Apps\\n\\nESPN\\n\\nESPN Fantasy\\n\\nFollow ESPN\\n\\nFacebook\\n\\nX/Twitter\\n\\nInstagram\\n\\nSnapchat\\n\\nTikTok\\n\\nYouTube\\n\\nTerms of Use\\n\\nPrivacy Policy\\n\\nYour US State Privacy Rights\\n\\nChildren's Online Privacy Policy\\n\\nInterest-Based Ads\\n\\nAbout Nielsen Measurement\\n\\nDo Not Sell or Share My Personal Information\\n\\nContact Us\\n\\nDisney Ad Sales Site\\n\\nWork for ESPN\", metadata={'source': 'https://www.espn.com/'}),\n",
571
+ " Document(page_content='Skip to main content\\n\\nlink\\n\\nHome\\n\\nPeople\\n\\nMinor\\n\\n\\tMinor Requirements\\n\\tCourse Descriptions\\n\\tCourse Schedule\\n\\tHow to Declare the Minor\\n\\tChoosing a Minor\\n\\nResearch and Immersion\\n\\n\\tResearch and Immersion Overview\\n\\tDS 3850 Research in Data Science\\n\\tDSI Summer Research Program\\n\\tData Science for Social Good\\n\\tResearch Immersion in Data Science\\n\\tDSI Internship\\n\\nFAQ\\n\\nNews\\n\\nForms\\n\\nContact and Email List\\n\\nData Science Institute\\n\\nUndergraduate Data Science\\n\\nFrequently Asked Questions\\n\\nDeclaring the Minor\\n\\nHow do I declare the Data Science Minor?\\n\\nUse the forms and follow the procedures for your home college. See How to Declare the Data Science Minor.\\n\\nWhen should I declare the Data Science Minor?\\n\\nWhile minor declarations can be made any time, DS courses will give some preference to students who have officially declared the Data Science Minor. So we recommend declaring the minor sooner rather than later. It is always possible to drop a declared minor. Minor declarations must be submitted at least two weeks before registration begins. Otherwise, the minor declaration will not be processed until after registration. No preference will be given during registration for an “intent” to declare because the minor declaration was made too late.\\n\\nI declared the Data Science Minor, but I did not get into the class I wanted to take for the minor. Why?', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
572
+ " Document(page_content='I declared the Data Science Minor, but I did not get into the class I wanted to take for the minor. Why?\\n\\nFirst, preference for students who have declared the minor only applies to DS courses, not other courses. Second, if you declared the minor within two weeks of registration, your minor declaration will. not show up on YES, and you will not have preference. Third, while we try to hold as many seats for students who have declared the minor as we can, not all seats are reserved.\\n\\nI am a first-year A&S student. Can I really declare the Data Science Minor now?\\n\\nYes. While A&S students are usually prevented from declaring a major or minor until sophomore year, first-year A&S students can declare the Data Science Minor. As noted in the previous question, this can be important to do since some popular core DS courses will give some preference to students who have officially declared Data Science as a minor.\\n\\nI am a current junior (rising senior), can I complete the Data Science Minor (for Spring 2021 juniors only)?', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
573
+ " Document(page_content='I am a current junior (rising senior), can I complete the Data Science Minor (for Spring 2021 juniors only)?\\n\\nJuniors must contact the Director of Undergraduate Data Science to discuss options. DS 1000 is not open to current juniors (rising seniors). DS 3100 will not be taught next year (Fall 2021 or Spring 2022) and will need to be suitably replaced, which will require an approved plan from the Director. Furthermore, while DS / CS 3262 is current slated to be taught Spring 2022, that is not fully guaranteed, so students should see if they can take one of the other machine learning options.\\n\\nI am a rising senior or current senior and cannot register for DS 1000. Why?\\n\\nRising seniors and current seniors can only register for DS 1000 if there are available seats immediately before the semester begins with permission of the instructor. DS 1000 is intended as an introduction to data science for first years and sophomores, which is why this restriction is in place.\\n\\nCollege-Specific Information\\n\\nWhat college is the home of the Data Science Minor?\\n\\nThe Data Science Minor is a trans-institutional minor, shared by A&S, Blair, Engineering, and Peabody.\\n\\nI am an A&S student. Do DS courses count as A&S courses?', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
574
+ " Document(page_content='I am an A&S student. Do DS courses count as A&S courses?\\n\\nAll courses with a DS prefix count as courses within each of the colleges, including A&S. If you are an A&S student, and are taking a course that is cross-listed, make sure you enroll in the one with the DS prefix. Electives outside of A&S without the DS prefix will generally not count as A&S courses, so plan accordingly.\\n\\nWhat are the unique credit hour rules for the Data Science Minor?\\n\\nStudents electing an undergraduate minor in Data Science must follow academic regulations regarding minors in their home college, including but not limited to regulations regarding unique hours. The unique credit hour rule is specific to the College of Arts and Science and Peabody College. The School of Engineering and Blair School of Music do not have a unique credit hour rule. The Data Science minor cannot waive this rule. Please talk with your academic advisor about how to satisfy these requirements.\\n\\nInfo About the Courses\\n\\nDS 1000\\n\\nThank you for your interest in DS 1000! The course is full for the fall 2021 semester. Due to student demand and the transinstitutional nature of the course, we cannot make special exceptions as to which students, if any, on the waitlist are able to enroll. DS 1000 will be offered again in the spring semester.\\n\\nWhat computer programming course should I take?', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
575
+ " Document(page_content='What computer programming course should I take?\\n\\nSee What Programming Course To Take? In general, students interested in data science and scientific computing (not in computer science per se) should learn Python (and R).\\n\\nHow do I find courses approved for the data science minor on YES?\\n\\nOn YES, to select all courses approved for credit in the Data Science minor offered in a given semester, select the “Advanced” link next to the search box, select the “Class Attributes” drop-down box on the bottom right of the advanced search page, and then select “Eligible for Data Science” to find all courses. (Note that these course tags will not all be in place on YES until the registration period for Fall 2021 begins.)\\n\\nCan other courses, besides those listed, count towards the Data Science Minor?\\n\\nNew courses, special topics courses, or graduate-level courses that seem related to data science could count as electives. Contact the Director of Undergraduate Data Science to request consideration.\\n\\nWhy doesn’t CS 1104 count towards the Data Science Minor?', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
576
+ " Document(page_content='Why doesn’t CS 1104 count towards the Data Science Minor?\\n\\nIt does, as a prerequisite to CS 2204, which counts towards the minor. CS / DS 1100 was created as a new single-semester programming course for the Data Science Minor. It roughly has 2/3 the content of CS 1104 and 1/3 the content of CS 2204. While CS / DS 1100 counts as a single semester of programming for the minor, we strongly encourage students interested in data science, and in using data science tools and techniques, to take two semesters of programming in Python (CS / DS 1100 or CS 1104, followed by CS 2204). If you have taken CS 1104, you can take CS 1100, but you will only receive a total of four credits for\\xa0the two courses. See also What Programming Course To Take?\\n\\nI see that after having taken CS 1104, I can take CS/DS 1100 instead of taking CS 2204. What are the downsides of doing so?', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
577
+ " Document(page_content='I see that after having taken CS 1104, I can take CS/DS 1100 instead of taking CS 2204. What are the downsides of doing so?\\n\\nAfter taking CS 1104, we do recommend you take CS 2204. If you are interested in data science, a broader experience in Python in desirable (in fact, we recommend that students having taken CS 1100 try to take CS 2204 as well). CS/DS 1100 and 1104 have significant overlap (both are introductions to programming using Python). That said, it is permissible to take CS/DS 1100 after having taken CS 1104. You will only get 1 (out of 3) credit hours for CS/DS 1100 (after having taken CS 1104), but the combination of CS/DS 1100 and 1104 will satisfy the DS minor programming requirement. Note that if you enroll in three 3-hour courses and CS/DS 1100 (after having taken CS 1104) it will look like you are registered for 12 credit hours during registration and at the start of the semester, but your credit hours will be reduced to only 10 credit hours (because the credits for CS/DS 1100 will be cut back to 1 after the add/drop period). Enrolling in fewer than 12 credit hours can have significant consequences on financial aid and potentially on visa status for international students. Please be mindful of this.\\n\\nWhat is the difference between CS 1100 and DS 1100?\\n\\nNothing. They are the same course. They meet the same time in the same place and are taught by the same instructor. They are just cross-listed.', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
578
+ " Document(page_content='Nothing. They are the same course. They meet the same time in the same place and are taught by the same instructor. They are just cross-listed.\\n\\nI have taken CS 1101. What computer programming course should I take next?\\n\\nYou have two options. You can either take CS 2201 (in C++) or take CS 1100 (in Python). Of course, you could also take CS 1104 and 2204 (in Python). CS 1100, 2201, and 2204 all satisfy the programming requirement for the minor. Note that CS 2201 is a prerequisite for many upper-level CS courses (as well as required for the CS major and minor). For more information, see What Programming Course To Take?\\n\\nECON 3750 and MATH 3670 are listed both as satisfying the core machine learning requirement and as electives. If I take one, will it double-count for both requirements?\\n\\nNo. They are listed under both because a student who takes one of the other machine learning\\xa0courses to satisfy the core requirement (CS/DS 3262 or CS 4262) can also take ECON 3750 or MATH 3670 as an elective; the content is sufficiently different that both can count towards the minor, but one course cannot double-count for two minor requirements.\\n\\nCan I take ECON 3750 or MATH 3670 as an elective if I have already taken CS 3262 or CS 4262?', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
579
+ " Document(page_content='Can I take ECON 3750 or MATH 3670 as an elective if I have already taken CS 3262 or CS 4262?\\n\\nYes (see above). ECON 3750 and MATH 3670 are sufficiently different from CS 3262 or CS 4262 (and from each other) that you can take these as electives. In fact, you could take ECON 3750 to satisfy the machine learning requirement and then take MATH 3670 as an elective.\\n\\nCS 3262 can count towards the Data Science minor. CS 3262 does not count directly towards the Computer Science major requirements but could be used as either a tech elective or open elective for Computer Science majors.\\n\\nWhy doesn’t MATH 2820 count towards the Data Science Minor?\\n\\nIt does, as a prerequisite to MATH 2821, which counts towards the minor. The two-course sequence of MATH 2820 and MATH 2821 counts towards the Data Science Minor; the\\xa0two-course sequence is required because MATH 2820 goes deep into mathematical foundations of probability ad statistics concepts, but does not by itself cover the breadth of topics of other introductory statistics courses. This two-course sequence provides an excellent introduction to mathematical statistics.\\n\\nResearch and Immersion Information\\n\\nCan I do research for course credit?\\n\\nYes, you can do research for course credit (including DS 3850). More information can be found here: https://www.vanderbilt.edu/undergrad-datascience/ds-3850-research-in-data-science/\\n\\nI am interested in the Undergraduate Data Science Immersion Program. How can I participate.', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
580
+ " Document(page_content='I am interested in the Undergraduate Data Science Immersion Program. How can I participate.\\n\\nSome competitive summer immersion programs include DSI-SPR and Data Science for Social Good (DSSG). More information can be found on the following websites.\\n\\nhttps://www.vanderbilt.edu/datascience/academics/undergraduate/summer-research-program/\\n\\nhttps://www.vanderbilt.edu/datascience/data-science-for-social-good/\\n\\nTo get involved in data-science-oriented research with a faculty member, you will need to reach out to the faculty member. Pointers can be found here: https://www.vanderbilt.edu/undergrad-datascience/research-and-immersion-overview/. Having that research count towards the immersion requirement will be between your faculty mentor and your faculty immersion coordinator.\\n\\nAdditional information about research opportunities will be posted on the website in the future.\\n\\nContact\\n\\nHow do I ask a question about the Data Science?\\n\\nIf you have questions about the Data Science Minor or Immersion opportunities in data science, please email us: [email protected]\\n\\nTo whom can I petition if the Director denies my request?\\n\\nThe Governing Board of the Data Science Minor acts as the college-level oversight body for this trans-institutional minor and would be the appropriate next step for petitions related to the minor.\\n\\nData Science News\\n\\nOpportunities for Capstone Projects and Research Experience\\n\\nAttention Graduate Students! We’re Hiring!', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'}),\n",
581
+ " Document(page_content='Data Science News\\n\\nOpportunities for Capstone Projects and Research Experience\\n\\nAttention Graduate Students! We’re Hiring!\\n\\nVanderbilt student-athlete drives sports performance through data analysis\\n\\nNew Course: DS 3891 Special Topics: Intro to Generative AI\\n\\nNow Accepting Applications: DS Minor Teaching Fellowship for graduate students\\n\\nJoin Our Team: Student Worker Positions Available for Fall 2023 Semester!\\n\\nVIEW MORE EVENTS >\\n\\nYour Vanderbilt\\n\\nAlumni\\n\\nCurrent Students\\n\\nFaculty & Staff\\n\\nInternational Students\\n\\nMedia\\n\\nParents & Family\\n\\nProspective Students\\n\\nResearchers\\n\\nSports Fans\\n\\nVisitors & Neighbors\\n\\nQuick Links\\n\\nPeopleFinder\\n\\nLibraries\\n\\nNews\\n\\nCalendar\\n\\nMaps\\n\\nA-Z\\n\\n©\\n Site Development: Digital Strategies (Division of Communications)\\n Vanderbilt University is committed to principles of equal opportunity and affirmative action. Accessibility information. Vanderbilt®, Vanderbilt University®, V Oak Leaf Design®, Star V Design® and Anchor Down® are trademarks of The Vanderbilt University', metadata={'source': 'https://www.vanderbilt.edu/undergrad-datascience/faq'})]"
582
+ ]
583
+ },
584
+ "execution_count": null,
585
+ "metadata": {},
586
+ "output_type": "execute_result"
587
+ }
588
+ ],
589
+ "source": [
590
+ "# get the results\n",
591
+ "res_unstructured = website_to_text_unstructured(demo_urls)\n",
592
+ "res_unstructured"
593
+ ]
594
+ },
595
+ {
596
+ "cell_type": "code",
597
+ "execution_count": null,
598
+ "metadata": {},
599
+ "outputs": [],
600
+ "source": [
601
+ "#unit testb\n",
602
+ "test_converters_inputs(website_to_text_unstructured, demo_urls)"
603
+ ]
604
+ },
605
+ {
606
+ "cell_type": "markdown",
607
+ "metadata": {},
608
+ "source": [
609
+ "We also see here that there's something to be said about the unstructured approach which appears to be more conservative in the number of newline characters but still appears to preserve content. However, the gain is not overly significant.\n",
610
+ "\n",
611
+ "#### Trafilatura Parsing\n",
612
+ "\n",
613
+ "[Tralifatura](https://trafilatura.readthedocs.io/en/latest/) is a Python and command-line utility which attempts to extracts the most relevant information from a given website. "
614
+ ]
615
+ },
616
+ {
617
+ "cell_type": "code",
618
+ "execution_count": null,
619
+ "metadata": {},
620
+ "outputs": [],
621
+ "source": [
622
+ "def website_trafilatura(url):\n",
623
+ " downloaded = trafilatura.fetch_url(url)\n",
624
+ " return trafilatura.extract(downloaded)"
625
+ ]
626
+ },
627
+ {
628
+ "cell_type": "code",
629
+ "execution_count": null,
630
+ "metadata": {},
631
+ "outputs": [
632
+ {
633
+ "name": "stdout",
634
+ "output_type": "stream",
635
+ "text": [
636
+ "Total number of characters in example: 1565 \n",
637
+ "\n"
638
+ ]
639
+ },
640
+ {
641
+ "data": {
642
+ "text/plain": [
643
+ "'|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nPHI\\nMIA\\n||\\n56-49\\n57-49\\n||\\n||\\n||\\n||\\n||\\n6:40 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nMIL\\nWSH\\n||\\n57-49\\n44-62\\n||\\n||\\n||\\n||\\n||\\n7:05 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nTB\\nNYY\\n||\\n64-44\\n55-50\\n||\\n||\\n||\\n||\\n||\\n7:05 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nBAL\\nTOR\\n||\\n64-41\\n59-47\\n||\\n||\\n||\\n||\\n||\\n7:07 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nLAA\\nATL\\n||\\n55-51\\n67-36\\n||\\n||\\n||\\n||\\n||\\n7:20 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nCIN\\nCHC\\n||\\n58-49\\n53-52\\n||\\n||\\n||\\n||\\n||\\n8:05 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nCLE\\nHOU\\n||\\n53-53\\n59-47\\n||\\n||\\n||\\n||\\n||\\n8:10 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nSD\\nCOL\\n||\\n52-54\\n41-64\\n||\\n||\\n||\\n||\\n||\\n8:40 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nBOS\\nSEA\\n||\\n56-49\\n54-51\\n||\\n||\\n||\\n||\\n||\\n9:40 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nARI\\nSF\\n||\\n56-50\\n58-48\\n||\\n||\\n||\\n||\\n||\\n9:45 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nJPN\\nESP\\n||\\n4\\n0\\n||\\n||\\n||\\n||\\n||\\nFT\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nCRC\\nZAM\\n||\\n1\\n3\\n||\\n||\\n||\\n||\\n||\\nFT\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nCAN\\nAUS\\n||\\n0\\n4\\n||\\n||\\n||\\n||\\n||\\nFT\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nIRL\\nNGA\\n||\\n0\\n0\\n||\\n||\\n||\\n||\\n||\\nFT\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nPOR\\nUSA\\n||\\nWLDDL\\nDWWWW\\n||\\n||\\n||\\n||\\n||\\n3:00 AM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nVIE\\nNED\\n||\\nLLLLL\\nDWWWL\\n||\\n||\\n||\\n||\\n||\\n3:00 AM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nCHN\\nENG\\n||\\nWLDWW\\nWWDLW\\n||\\n||\\n||\\n||\\n||\\n7:00 AM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nHAI\\nDEN\\n||\\nLLLWL\\nLWLWW\\n||\\n||\\n||\\n||\\n||\\n7:00 AM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nAME\\nCLB\\n||\\nWWLLW\\nWLDDW\\n||\\n||\\n||\\n||\\n||\\n8:00 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nPUE\\nCHI\\n||\\nLLLDL\\nWWWWL\\n||\\n||\\n||\\n||\\n||\\n8:00 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nTOL\\nCOL\\n||\\nWLWDW\\nLDDWL\\n||\\n||\\n||\\n||\\n||\\n9:30 PM ET\\n||\\n|\\n|\\n|\\n|\\n|\\n|\\n||\\n|\\n|\\n||\\nGDL\\nSKC\\n||\\nLWWWL\\nLLDDW\\n||\\n||\\n||\\n||\\n||\\n10:00 PM ET\\n||\\n|\\n|\\n|'"
644
+ ]
645
+ },
646
+ "execution_count": null,
647
+ "metadata": {},
648
+ "output_type": "execute_result"
649
+ }
650
+ ],
651
+ "source": [
652
+ "trafilatura_text = website_trafilatura(demo_urls[0])\n",
653
+ "print('Total number of characters in example:', len(trafilatura_text), '\\n')\n",
654
+ "trafilatura_text"
655
+ ]
656
+ },
657
+ {
658
+ "cell_type": "markdown",
659
+ "metadata": {},
660
+ "source": [
661
+ "This output is SUBSTANTIALLY shorter with a length of 1565 characters. However, the problem is that the main article on the page actually isn't captured at all.\n",
662
+ "\n",
663
+ "#### jusText\n",
664
+ "\n",
665
+ "[jusText](https://pypi.org/project/jusText/) is another Python library for extracting content from a website."
666
+ ]
667
+ },
668
+ {
669
+ "cell_type": "code",
670
+ "execution_count": null,
671
+ "metadata": {},
672
+ "outputs": [],
673
+ "source": [
674
+ "def website_justext(url):\n",
675
+ " response = requests.get(url)\n",
676
+ " paragraphs = justext.justext(response.content, justext.get_stoplist(\"English\"))\n",
677
+ " content = [paragraph.text for paragraph in paragraphs \\\n",
678
+ " if not paragraph.is_boilerplate]\n",
679
+ " text = \" \".join(content)\n",
680
+ " return text"
681
+ ]
682
+ },
683
+ {
684
+ "cell_type": "code",
685
+ "execution_count": null,
686
+ "metadata": {},
687
+ "outputs": [
688
+ {
689
+ "data": {
690
+ "text/plain": [
691
+ "''"
692
+ ]
693
+ },
694
+ "execution_count": null,
695
+ "metadata": {},
696
+ "output_type": "execute_result"
697
+ }
698
+ ],
699
+ "source": [
700
+ "# Ensure behavior\n",
701
+ "justext_text = website_justext(demo_urls[0])\n",
702
+ "justext_text"
703
+ ]
704
+ },
705
+ {
706
+ "cell_type": "code",
707
+ "execution_count": null,
708
+ "metadata": {},
709
+ "outputs": [
710
+ {
711
+ "data": {
712
+ "text/plain": [
713
+ "'Declaring the Minor While minor declarations can be made any time, DS courses will give some preference to students who have officially declared the Data Science Minor. So we recommend declaring the minor sooner rather than later. It is always possible to drop a declared minor. Minor declarations must be submitted at least two weeks before registration begins. Otherwise, the minor declaration will not be processed until after registration. No preference will be given during registration for an “intent” to declare because the minor declaration was made too late. First, preference for students who have declared the minor only applies to DS courses, not other courses. Second, if you declared the minor within two weeks of registration, your minor declaration will. not show up on YES, and you will not have preference. Third, while we try to hold as many seats for students who have declared the minor as we can, not all seats are reserved. Yes. While A&S students are usually prevented from declaring a major or minor until sophomore year, first-year A&S students can declare the Data Science Minor. As noted in the previous question, this can be important to do since some popular core DS courses will give some preference to students who have officially declared Data Science as a minor. Juniors must contact the Director of Undergraduate Data Science to discuss options. DS 1000 is not open to current juniors (rising seniors). DS 3100 will not be taught next year (Fall 2021 or Spring 2022) and will need to be suitably replaced, which will require an approved plan from the Director. Furthermore, while DS / CS 3262 is current slated to be taught Spring 2022, that is not fully guaranteed, so students should see if they can take one of the other machine learning options. Rising seniors and current seniors can only register for DS 1000 if there are available seats immediately before the semester begins with permission of the instructor. DS 1000 is intended as an introduction to data science for first years and sophomores, which is why this restriction is in place. All courses with a DS prefix count as courses within each of the colleges, including A&S. If you are an A&S student, and are taking a course that is cross-listed, make sure you enroll in the one with the DS prefix. Electives outside of A&S without the DS prefix will generally not count as A&S courses, so plan accordingly. Students electing an undergraduate minor in Data Science must follow academic regulations regarding minors in their home college, including but not limited to regulations regarding unique hours. The unique credit hour rule is specific to the College of Arts and Science and Peabody College. The School of Engineering and Blair School of Music do not have a unique credit hour rule. The Data Science minor cannot waive this rule. Please talk with your academic advisor about how to satisfy these requirements. Info About the Courses Thank you for your interest in DS 1000! The course is full for the fall 2021 semester. Due to student demand and the transinstitutional nature of the course, we cannot make special exceptions as to which students, if any, on the waitlist are able to enroll. DS 1000 will be offered again in the spring semester. On YES, to select all courses approved for credit in the Data Science minor offered in a given semester, select the “Advanced” link next to the search box, select the “Class Attributes” drop-down box on the bottom right of the advanced search page, and then select “Eligible for Data Science” to find all courses. (Note that these course tags will not all be in place on YES until the registration period for Fall 2021 begins.) It does, as a prerequisite to CS 2204, which counts towards the minor. CS / DS 1100 was created as a new single-semester programming course for the Data Science Minor. It roughly has 2/3 the content of CS 1104 and 1/3 the content of CS 2204. While CS / DS 1100 counts as a single semester of programming for the minor, we strongly encourage students interested in data science, and in using data science tools and techniques, to take two semesters of programming in Python (CS / DS 1100 or CS 1104, followed by CS 2204). If you have taken CS 1104, you can take CS 1100, but you will only receive a total of four credits for the two courses. See also What Programming Course To Take? After taking CS 1104, we do recommend you take CS 2204. If you are interested in data science, a broader experience in Python in desirable (in fact, we recommend that students having taken CS 1100 try to take CS 2204 as well). CS/DS 1100 and 1104 have significant overlap (both are introductions to programming using Python). That said, it is permissible to take CS/DS 1100 after having taken CS 1104. You will only get 1 (out of 3) credit hours for CS/DS 1100 (after having taken CS 1104), but the combination of CS/DS 1100 and 1104 will satisfy the DS minor programming requirement. Note that if you enroll in three 3-hour courses and CS/DS 1100 (after having taken CS 1104) it will look like you are registered for 12 credit hours during registration and at the start of the semester, but your credit hours will be reduced to only 10 credit hours (because the credits for CS/DS 1100 will be cut back to 1 after the add/drop period). Enrolling in fewer than 12 credit hours can have significant consequences on financial aid and potentially on visa status for international students. Please be mindful of this. You have two options. You can either take CS 2201 (in C++) or take CS 1100 (in Python). Of course, you could also take CS 1104 and 2204 (in Python). CS 1100, 2201, and 2204 all satisfy the programming requirement for the minor. Note that CS 2201 is a prerequisite for many upper-level CS courses (as well as required for the CS major and minor). For more information, see What Programming Course To Take? No. They are listed under both because a student who takes one of the other machine learning courses to satisfy the core requirement (CS/DS 3262 or CS 4262) can also take ECON 3750 or MATH 3670 as an elective; the content is sufficiently different that both can count towards the minor, but one course cannot double-count for two minor requirements. Yes (see above). ECON 3750 and MATH 3670 are sufficiently different from CS 3262 or CS 4262 (and from each other) that you can take these as electives. In fact, you could take ECON 3750 to satisfy the machine learning requirement and then take MATH 3670 as an elective. CS 3262 can count towards the Data Science minor. CS 3262 does not count directly towards the Computer Science major requirements but could be used as either a tech elective or open elective for Computer Science majors. It does, as a prerequisite to MATH 2821, which counts towards the minor. The two-course sequence of MATH 2820 and MATH 2821 counts towards the Data Science Minor; the two-course sequence is required because MATH 2820 goes deep into mathematical foundations of probability ad statistics concepts, but does not by itself cover the breadth of topics of other introductory statistics courses. This two-course sequence provides an excellent introduction to mathematical statistics.'"
714
+ ]
715
+ },
716
+ "execution_count": null,
717
+ "metadata": {},
718
+ "output_type": "execute_result"
719
+ }
720
+ ],
721
+ "source": [
722
+ "# Try a different URL to see if behavior improves\n",
723
+ "justext_text = website_justext(demo_urls[1])\n",
724
+ "justext_text"
725
+ ]
726
+ },
727
+ {
728
+ "cell_type": "markdown",
729
+ "metadata": {},
730
+ "source": [
731
+ "Here, we see that we may prefer to stick with the langchain implementations. The first jusText example returned an empty string, although previous work demonstrates that on a different day, it worked well (note that the ESPN's content was different). With the second URL, parts of the website, particularly the headers, is actually missing."
732
+ ]
733
+ },
734
+ {
735
+ "cell_type": "markdown",
736
+ "metadata": {},
737
+ "source": [
738
+ "## Creating Document Segments\n",
739
+ "Now, the precursor to creating vector stores/embeddings is to create document segments. Since we have a variety of sources, we will keep this in mind as we develop the following function.\n",
740
+ "\n",
741
+ ":::{.callout-warning}\n",
742
+ "Note that the `get_document_segments` currently is meant to be used in one single pass with `context_info` being all of a single file type. [Issue #150](https://github.com/vanderbilt-data-science/lo-achievement/issues/150) is meant to expand this functionality so that if many files are uploaded, the software will be able to handle this.\n",
743
+ ":::"
744
+ ]
745
+ },
746
+ {
747
+ "cell_type": "code",
748
+ "execution_count": null,
749
+ "metadata": {},
750
+ "outputs": [],
751
+ "source": [
752
+ "#| export\n",
753
+ "def get_document_segments(context_info, data_type, chunk_size = 1500, chunk_overlap=100):\n",
754
+ "\n",
755
+ " load_fcn = None\n",
756
+ " addtnl_params = {'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap}\n",
757
+ "\n",
758
+ " # Define function use to do the loading\n",
759
+ " if data_type == 'text':\n",
760
+ " load_fcn = rawtext_to_doc_split\n",
761
+ " elif data_type == 'web_page':\n",
762
+ " load_fcn = website_to_text_unstructured\n",
763
+ " elif data_type == 'youtube_video':\n",
764
+ " load_fcn = youtube_to_text\n",
765
+ " else:\n",
766
+ " load_fcn = files_to_text\n",
767
+ " \n",
768
+ " # Get the document segments\n",
769
+ " doc_segments = load_fcn(context_info, **addtnl_params)\n",
770
+ "\n",
771
+ " return doc_segments"
772
+ ]
773
+ },
774
+ {
775
+ "cell_type": "markdown",
776
+ "metadata": {},
777
+ "source": [
778
+ "## Creating Vector Stores from Document Segments\n",
779
+ "The last step here will be in the creation of vector stores from the provided document segments. We will allow for the usage of either Chroma or DeepLake and enforce OpenAIEmbeddings."
780
+ ]
781
+ },
782
+ {
783
+ "cell_type": "code",
784
+ "execution_count": null,
785
+ "metadata": {},
786
+ "outputs": [],
787
+ "source": [
788
+ "#| export\n",
789
+ "def create_local_vector_store(document_segments, **retriever_kwargs):\n",
790
+ " embeddings = OpenAIEmbeddings()\n",
791
+ " db = Chroma.from_documents(document_segments, embeddings)\n",
792
+ " retriever = db.as_retriever(**retriever_kwargs)\n",
793
+ " \n",
794
+ " return db, retriever"
795
+ ]
796
+ },
797
+ {
798
+ "cell_type": "markdown",
799
+ "metadata": {},
800
+ "source": [
801
+ "### Unit test of vector store and segment creation"
802
+ ]
803
+ },
804
+ {
805
+ "cell_type": "code",
806
+ "execution_count": null,
807
+ "metadata": {},
808
+ "outputs": [],
809
+ "source": [
810
+ "from langchain.chat_models import ChatOpenAI\n",
811
+ "from getpass import getpass"
812
+ ]
813
+ },
814
+ {
815
+ "cell_type": "code",
816
+ "execution_count": null,
817
+ "metadata": {},
818
+ "outputs": [],
819
+ "source": [
820
+ "openai_api_key = getpass()\n",
821
+ "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
822
+ "\n",
823
+ "llm = ChatOpenAI(model_name = 'gpt-3.5-turbo-16k')"
824
+ ]
825
+ },
826
+ {
827
+ "cell_type": "code",
828
+ "execution_count": null,
829
+ "metadata": {},
830
+ "outputs": [],
831
+ "source": [
832
+ "test_files = ['../roadnottaken.txt', '../2302.11382.pdf']\n",
833
+ "\n",
834
+ "#get vector store\n",
835
+ "segs = get_document_segments(test_files, data_type='other', chunk_size = 1000, chunk_overlap = 100)\n",
836
+ "chroma_db, vs_retriever = create_local_vector_store(segs)\n",
837
+ "\n",
838
+ "#create test retrievalqa\n",
839
+ "qa_chain = RetrievalQA.from_chain_type(llm=openai_llm, chain_type=\"stuff\", retriever=vs_retriever)"
840
+ ]
841
+ },
842
+ {
843
+ "cell_type": "code",
844
+ "execution_count": null,
845
+ "metadata": {},
846
+ "outputs": [
847
+ {
848
+ "data": {
849
+ "text/plain": [
850
+ "[Document(page_content='Two roads diverged in a yellow wood,\\rAnd sorry I could not travel both\\rAnd be one traveler, long I stood\\rAnd looked down one as far as I could\\rTo where it bent in the undergrowth;\\r\\rThen took the other, as just as fair,\\rAnd having perhaps the better claim,\\rBecause it was grassy and wanted wear;\\rThough as for that the passing there\\rHad worn them really about the same,\\r\\rAnd both that morning equally lay\\rIn leaves no step had trodden black. Oh, I kept the first for another day! Yet knowing how way leads on to way,\\rI doubted if I should ever come back. I shall be telling this with a sigh\\rSomewhere ages and ages hence:\\rTwo roads diverged in a wood, and IэI took the one less traveled by,\\rAnd that has made all the difference.', metadata={'source': '../roadnottaken.txt', 'start_index': 0}),\n",
851
+ " Document(page_content='any unnecessary steps,” is useful in flagging inaccuracies in the user’s original request so that the final recipe is efficient.', metadata={'source': '../2302.11382.pdf', 'start_index': 92662}),\n",
852
+ " Document(page_content='The third statement provides an optional way for the user to stop the output generation process. This step is not always needed, but can be useful in situations where there may be the potential for ambiguity regarding whether or not the user- provided input between inputs is meant as a refinement for the next generation or a command to stop. For example, an explicit stop phrase could be created if the user was generating data related to road signs, where the user might want to enter a refinement of the generation like “stop” to indicate that a stop sign should be added to the output.', metadata={'source': '../2302.11382.pdf', 'start_index': 72043}),\n",
853
+ " Document(page_content='“When I ask you a question, generate three addi- tional questions that would help you give a more accurate answer. Assume that I know little about the topic that we are discussing and please define any terms that are not general knowledge. When I have answered the three questions, combine the answers to produce the final answers to my original question.”\\n\\nOne point of variation in this pattern is where the facts are output. Given that the facts may be terms that the user is not familiar with, it is preferable if the list of facts comes after the output. This after-output presentation ordering allows the user to read and understand the statements before seeing what statements should be checked. The user may also determine additional facts prior to realizing the fact list at the end should be checked.', metadata={'source': '../2302.11382.pdf', 'start_index': 57473})]"
854
+ ]
855
+ },
856
+ "execution_count": null,
857
+ "metadata": {},
858
+ "output_type": "execute_result"
859
+ }
860
+ ],
861
+ "source": [
862
+ "# check for functionality\n",
863
+ "chroma_db.similarity_search('The street was forked and I did not know which way to go')"
864
+ ]
865
+ },
866
+ {
867
+ "cell_type": "code",
868
+ "execution_count": null,
869
+ "metadata": {},
870
+ "outputs": [],
871
+ "source": [
872
+ "#check qa chain for functionality\n",
873
+ "ans = qa_chain({'question':'What is the best prompt to use when I want the model to take on a certain attitude of a person?'})"
874
+ ]
875
+ },
876
+ {
877
+ "cell_type": "code",
878
+ "execution_count": null,
879
+ "metadata": {},
880
+ "outputs": [
881
+ {
882
+ "data": {
883
+ "text/plain": [
884
+ "{'question': 'What is the best prompt to use when I want the model to take on a certain attitude of a person?',\n",
885
+ " 'answer': 'The best prompt to use when you want the model to take on a certain attitude of a person is to provide a persona for the model to embody. This can be expressed as a job description, title, fictional character, historical figure, or any other attributes associated with a well-known type of person. The prompt should specify the outputs that this persona would create. Additionally, personas can also represent inanimate or non-human entities, such as a Linux terminal or a database. In this case, the prompt should specify how the inputs should be delivered to the entity and what outputs the entity should produce. It is also possible to provide a better version of the question and prompt the model to ask if the user would like to use the better version instead.\\n',\n",
886
+ " 'sources': '../2302.11382.pdf',\n",
887
+ " 'source_documents': [Document(page_content='4) Example Implementation: A sample prompt for a flipped\\n\\ninteraction is shown below:\\n\\n“From now on, I would like you to ask me questions to deploy a Python application to AWS. When you have enough information to deploy the application, create a Python script to automate the deployment.”\\n\\n2) Motivation: Users may not know what types of outputs or details are important for an LLM to focus on to achieve a given task. They may know, however, the role or type of person that they would normally ask to get help with these things. The Persona pattern enables the users to express what they need help with without knowing the exact details of the outputs they need.', metadata={'source': '../2302.11382.pdf', 'start_index': 36397}),\n",
888
+ " Document(page_content='ments:\\n\\nContextual Statements Act as persona X Provide outputs that persona X would create\\n\\nThe first statement conveys the idea that the LLM needs to act as a specific persona and provide outputs that such a persona would. This persona can be expressed in a number of ways, ranging from a job description, title, fictional char- acter, historical figure, etc. The persona should elicit a set of attributes associated with a well-known job title, type of person, etc.2\\n\\n5) Consequences: One consideration when designing the prompt is how much to dictate to the LLM regarding what information to collect prior to termination. In the example above, the flipped interaction is open-ended and can vary sig- nificantly in the final generated artifact. This open-endedness makes the prompt generic and reusable, but may potentially ask additional questions that could be skipped if more context is given.', metadata={'source': '../2302.11382.pdf', 'start_index': 37872}),\n",
889
+ " Document(page_content='In this example, the LLM is instructed to provide outputs that a ”security reviewer” would. The prompt further sets the stage that code is going to be evaluated. Finally, the user refines the persona by scoping the persona further to outputs regarding the code.\\n\\nPersonas can also represent inanimate or non-human en- tities, such as a Linux terminal, a database, or an animal’s perspective. When using this pattern to represent these entities, it can be useful to also specify how you want the inputs delivered to the entity, such as “assume my input is what the owner is saying to the dog and your output is the sounds the dog is making”. An example prompt for a non-human entity that uses a “pretend to be” wording is shown below:\\n\\n“You are going to pretend to be a Linux terminal for a computer that has been compromised by an attacker. When I type in a command, you are going the Linux to output terminal would produce.”\\n\\nthe corresponding text\\n\\nthat', metadata={'source': '../2302.11382.pdf', 'start_index': 41330}),\n",
890
+ " Document(page_content='the corresponding text\\n\\nthat\\n\\nThis prompt is designed to simulate a computer that has been compromised by an attacker and is being controlled through a Linux terminal. The prompt specifies that the user will input commands into the terminal, and in response, the simulated terminal will output the corresponding text that would be produced by a real Linux terminal. This prompt is more prescriptive in the persona and asks the LLM to, not only be a Linux terminal, but to further act as a computer that has been compromised by an attacker.\\n\\n3) Structure and Key Ideas: Fundamental contextual state-\\n\\nments:\\n\\nContextual Statements Within scope X, suggest a better version of the question to use instead (Optional) prompt me if I would like to use the better version instead', metadata={'source': '../2302.11382.pdf', 'start_index': 42256})]}"
891
+ ]
892
+ },
893
+ "execution_count": null,
894
+ "metadata": {},
895
+ "output_type": "execute_result"
896
+ }
897
+ ],
898
+ "source": [
899
+ "#show result\n",
900
+ "ans"
901
+ ]
902
+ },
903
+ {
904
+ "cell_type": "markdown",
905
+ "metadata": {},
906
+ "source": [
907
+ "In conclusion, this is looking pretty solid. Let's leverage this functionality within the code base."
908
+ ]
909
+ }
910
+ ],
911
+ "metadata": {
912
+ "kernelspec": {
913
+ "display_name": "python3",
914
+ "language": "python",
915
+ "name": "python3"
916
+ }
917
+ },
918
+ "nbformat": 4,
919
+ "nbformat_minor": 2
920
+ }
lo-achievement/nbs/nbdev.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ project:
2
+ output-dir: _docs
3
+
4
+ website:
5
+ title: "ai_classroom_suite"
6
+ site-url: "https://vanderbilt-data-science.github.io/lo-achievement"
7
+ description: "A repository supporting enhanced instruction and grading using AI"
8
+ repo-branch: main
9
+ repo-url: "https://github.com/vanderbilt-data-science/lo-achievement"
lo-achievement/nbs/prompt_interaction_base.ipynb ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# prompt_interaction_base.ipynb\n",
8
+ "> A notebook for formulating prompts and prompting\n",
9
+ "\n",
10
+ "In this notebook, we create some base functionality for creating prompts and getting answers for the LLMs in a simplified, unified way.\n",
11
+ "\n",
12
+ ":::{.callout-caution}\n",
13
+ "These notebooks are development notebooks, meaning that they are meant to be run locally or somewhere that supports navigating a full repository (in other words, not Google Colab unless you clone the entire repository to drive and then mount the Drive-Repository.) However, it is expected if you're able to do all of those steps, you're likely also able to figure out the required pip installs for development there.\n",
14
+ ":::\n"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "raw",
19
+ "metadata": {},
20
+ "source": [
21
+ "---\n",
22
+ "skip_exec: true\n",
23
+ "---"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": null,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "#| default_exp PromptInteractionBase"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": null,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "#| export\n",
42
+ "from langchain.chat_models import ChatOpenAI\n",
43
+ "from langchain.llms import OpenAI\n",
44
+ "\n",
45
+ "from langchain import PromptTemplate\n",
46
+ "from langchain.prompts import ChatPromptTemplate, PromptTemplate\n",
47
+ "from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate\n",
48
+ "from langchain.chains import LLMChain, ConversationalRetrievalChain, RetrievalQAWithSourcesChain\n",
49
+ "from langchain.chains.base import Chain\n",
50
+ "\n",
51
+ "from getpass import getpass\n",
52
+ "\n",
53
+ "import os"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "markdown",
58
+ "metadata": {},
59
+ "source": [
60
+ "## Model and Authentication Setup\n",
61
+ "Here, we create functionality to authenticate the user when needed specifically using OpenAI models. Additionally, we create the capacity to make LLMChains and other chains using one unified interface."
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": null,
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "#| export\n",
71
+ "def create_model(openai_mdl='gpt-3.5-turbo-16k', temperature=0.1, **chatopenai_kwargs):\n",
72
+ " llm = ChatOpenAI(model_name = openai_mdl, temperature=temperature, **chatopenai_kwargs)\n",
73
+ "\n",
74
+ " return llm"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": null,
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "#| export\n",
84
+ "def set_openai_key():\n",
85
+ " openai_api_key = getpass()\n",
86
+ " os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
87
+ "\n",
88
+ " return"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "markdown",
93
+ "metadata": {},
94
+ "source": [
95
+ "**And now for a quick test of this functionality**"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": null,
101
+ "metadata": {},
102
+ "outputs": [],
103
+ "source": [
104
+ "set_openai_key()\n",
105
+ "assert os.environ[\"OPENAI_API_KEY\"], \"Either you didn't run set_openai_key or you haven't set it to something.\"\n",
106
+ "\n",
107
+ "chat_mdl = create_model()\n",
108
+ "assert isinstance(chat_mdl, ChatOpenAI), \"The default model type is currently ChatOpenAI. If that has changed, change this test.\""
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "markdown",
113
+ "metadata": {},
114
+ "source": [
115
+ "## Create chat prompt templates\n",
116
+ "Here, we'll create a tutor prompt template to help us with self-study and quizzing, and help create the student messages."
117
+ ]
118
+ },
119
+ {
120
+ "cell_type": "code",
121
+ "execution_count": null,
122
+ "metadata": {},
123
+ "outputs": [],
124
+ "source": [
125
+ "#| export\n",
126
+ "# Create system prompt template\n",
127
+ "SYSTEM_TUTOR_TEMPLATE = (\"You are a world-class tutor helping students to perform better on oral and written exams though interactive experiences. \" +\n",
128
+ " \"When assessing and evaluating students, you always ask one question at a time, and wait for the student's response before \" +\n",
129
+ " \"providing them with feedback. Asking one question at a time, waiting for the student's response, and then commenting \" +\n",
130
+ " \"on the strengths and weaknesses of their responses (when appropriate) is what makes you such a sought-after, world-class tutor.\")\n",
131
+ "\n",
132
+ "# Create a human response template\n",
133
+ "HUMAN_RESPONSE_TEMPLATE = (\"I'm trying to better understand the text provided below. {assessment_request} The learning objectives to be assessed are: \" +\n",
134
+ " \"{learning_objectives}. Although I may request more than one assessment question, you should \" +\n",
135
+ " \"only provide ONE question in you initial response. Do not include the answer in your response. \" +\n",
136
+ " \"If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional \" +\n",
137
+ " \"chances to respond until I get the correct choice. Explain why the correct choice is right. \" +\n",
138
+ " \"The text that you will base your questions on is as follows: {context}.\")\n",
139
+ "\n",
140
+ "HUMAN_RETRIEVER_RESPONSE_TEMPLATE = (\"I want to master the topics based on the excerpts of the text below. Given the following extracted text from long documents, {assessment_request} The learning objectives to be assessed are: \" +\n",
141
+ " \"{learning_objectives}. Although I may request more than one assessment question, you should \" +\n",
142
+ " \"only provide ONE question in you initial response. Do not include the answer in your response. \" +\n",
143
+ " \"If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional \" +\n",
144
+ " \"chances to respond until I get the correct choice. Explain why the correct choice is right. \" +\n",
145
+ " \"The extracted text from long documents are as follows: {summaries}.\")\n",
146
+ "\n",
147
+ "def create_base_tutoring_prompt(system_prompt=None, human_prompt=None):\n",
148
+ "\n",
149
+ " #setup defaults using defined values\n",
150
+ " if system_prompt == None:\n",
151
+ " system_prompt = PromptTemplate(template = SYSTEM_TUTOR_TEMPLATE,\n",
152
+ " input_variables = [])\n",
153
+ " \n",
154
+ " if human_prompt==None:\n",
155
+ " human_prompt = PromptTemplate(template = HUMAN_RESPONSE_TEMPLATE,\n",
156
+ " input_variables=['assessment_request', 'learning_objectives', 'context'])\n",
157
+ "\n",
158
+ " # Create prompt messages\n",
159
+ " system_tutor_msg = SystemMessagePromptTemplate(prompt=system_prompt)\n",
160
+ " human_tutor_msg = HumanMessagePromptTemplate(prompt= human_prompt)\n",
161
+ "\n",
162
+ " # Create ChatPromptTemplate\n",
163
+ " chat_prompt = ChatPromptTemplate.from_messages([system_tutor_msg, human_tutor_msg])\n",
164
+ "\n",
165
+ " return chat_prompt"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "markdown",
170
+ "metadata": {},
171
+ "source": [
172
+ "Now for a quick unit test for testing..."
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": null,
178
+ "metadata": {},
179
+ "outputs": [],
180
+ "source": [
181
+ "chat_prompt = create_base_tutoring_prompt()\n",
182
+ "assert chat_prompt.messages[0].prompt.template == SYSTEM_TUTOR_TEMPLATE, \"Did not set up the first chat_prompt to be SystemMessage\"\n",
183
+ "assert chat_prompt.messages[1].prompt.template == HUMAN_RESPONSE_TEMPLATE, \"Did not set up the second element of chat_prompt to be HumanMessage\""
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "markdown",
188
+ "metadata": {},
189
+ "source": [
190
+ "Now, let's define a function that allows us to set up default variables in case the user chooses not to pass something in."
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "execution_count": null,
196
+ "metadata": {},
197
+ "outputs": [],
198
+ "source": [
199
+ "#| export\n",
200
+ "DEFAULT_ASSESSMENT_MSG = 'Please design a 5 question short answer quiz about the provided text.'\n",
201
+ "DEFAULT_LEARNING_OBJS_MSG = 'Identify and comprehend the important topics and underlying messages and connections within the text'\n",
202
+ "\n",
203
+ "def get_tutoring_prompt(context, chat_template=None, assessment_request = None, learning_objectives = None, **kwargs):\n",
204
+ "\n",
205
+ " # set defaults\n",
206
+ " if chat_template is None:\n",
207
+ " chat_template = create_base_tutoring_prompt()\n",
208
+ " else:\n",
209
+ " if not all([prompt_var in chat_template.input_variables\n",
210
+ " for prompt_var in ['context', 'assessment_request', 'learning_objectives']]):\n",
211
+ " raise KeyError('''It looks like you may have a custom chat_template. Either include context, assessment_request, and learning objectives\n",
212
+ " as input variables or create your own tutoring prompt.''')\n",
213
+ "\n",
214
+ " if assessment_request is None and 'assessment_request':\n",
215
+ " assessment_request = DEFAULT_ASSESSMENT_MSG\n",
216
+ " \n",
217
+ " if learning_objectives is None:\n",
218
+ " learning_objectives = DEFAULT_LEARNING_OBJS_MSG\n",
219
+ " \n",
220
+ " # compose final prompt\n",
221
+ " tutoring_prompt = chat_template.format_prompt(context=context,\n",
222
+ " assessment_request = assessment_request,\n",
223
+ " learning_objectives = learning_objectives,\n",
224
+ " **kwargs)\n",
225
+ " \n",
226
+ " return tutoring_prompt\n"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "markdown",
231
+ "metadata": {},
232
+ "source": [
233
+ "**Another quick unit test...**"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "code",
238
+ "execution_count": null,
239
+ "metadata": {},
240
+ "outputs": [
241
+ {
242
+ "data": {
243
+ "text/plain": [
244
+ "[SystemMessage(content=\"You are a world-class tutor helping students to perform better on oral and written exams though interactive experiences.\\nWhen assessing and evaluating students, you always ask one question at a time, and wait for the student's response before providing them with feedback.\\nAsking one question at a time, waiting for the student's response, and then commenting on the strengths and weaknesses of their responses (when appropriate)\\nis what makes you such a sought-after, world-class tutor.\", additional_kwargs={}),\n",
245
+ " HumanMessage(content=\"I'm trying to better understand the text provided below. Please design a 5 question short answer quiz about the provided text. The learning objectives to be assessed are:\\nIdentify and comprehend the important topics and underlying messages and connections within the text. Although I may request more than one assessment question, you should\\nonly provide ONE question in you initial response. Do not include the answer in your response.\\nIf I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional\\nchances to respond until I get the correct choice. Explain why the correct choice is right.\\nThe text that you will base your questions on is as follows: The dog was super pretty and cute.\", additional_kwargs={}, example=False)]"
246
+ ]
247
+ },
248
+ "execution_count": null,
249
+ "metadata": {},
250
+ "output_type": "execute_result"
251
+ }
252
+ ],
253
+ "source": [
254
+ "# For defaults\n",
255
+ "res = get_tutoring_prompt('The dog was super pretty and cute').to_messages()\n",
256
+ "res"
257
+ ]
258
+ },
259
+ {
260
+ "cell_type": "markdown",
261
+ "metadata": {},
262
+ "source": [
263
+ "Now, let's finally define how we can get the chat response from the model."
264
+ ]
265
+ },
266
+ {
267
+ "cell_type": "code",
268
+ "execution_count": null,
269
+ "metadata": {},
270
+ "outputs": [],
271
+ "source": [
272
+ "#| export\n",
273
+ "def get_tutoring_answer(context, tutor_mdl, chat_template=None, assessment_request=None, learning_objectives=None, return_dict=False, call_kwargs={}, input_kwargs={}):\n",
274
+ " \n",
275
+ " # Get answer from chat\n",
276
+ " \n",
277
+ " # set defaults\n",
278
+ " if assessment_request is None:\n",
279
+ " assessment_request = DEFAULT_ASSESSMENT_MSG\n",
280
+ " if learning_objectives is None:\n",
281
+ " learning_objectives = DEFAULT_LEARNING_OBJS_MSG\n",
282
+ " \n",
283
+ " common_inputs = {'assessment_request':assessment_request, 'learning_objectives':learning_objectives}\n",
284
+ " \n",
285
+ " # get answer based on interaction type\n",
286
+ " if isinstance(tutor_mdl, ChatOpenAI):\n",
287
+ " human_ask_prompt = get_tutoring_prompt(context, chat_template, assessment_request, learning_objectives)\n",
288
+ " tutor_answer = tutor_mdl(human_ask_prompt.to_messages())\n",
289
+ "\n",
290
+ " if not return_dict:\n",
291
+ " final_answer = tutor_answer.content\n",
292
+ " \n",
293
+ " elif isinstance(tutor_mdl, Chain):\n",
294
+ " if isinstance(tutor_mdl, RetrievalQAWithSourcesChain):\n",
295
+ " if 'question' not in input_kwargs.keys():\n",
296
+ " common_inputs['question'] = assessment_request\n",
297
+ " final_inputs = {**common_inputs, **input_kwargs}\n",
298
+ " else:\n",
299
+ " common_inputs['context'] = context\n",
300
+ " final_inputs = {**common_inputs, **input_kwargs}\n",
301
+ " \n",
302
+ " # get answer\n",
303
+ " tutor_answer = tutor_mdl(final_inputs, **call_kwargs)\n",
304
+ " final_answer = tutor_answer\n",
305
+ "\n",
306
+ " if not return_dict:\n",
307
+ " final_answer = final_answer['answer']\n",
308
+ " \n",
309
+ " else:\n",
310
+ " raise NotImplementedError(f\"tutor_mdl of type {type(tutor_mdl)} is not supported.\")\n",
311
+ "\n",
312
+ " return final_answer"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": null,
318
+ "metadata": {},
319
+ "outputs": [],
320
+ "source": [
321
+ "#| export\n",
322
+ "\n",
323
+ "DEFAULT_CONDENSE_PROMPT_TEMPLATE = (\"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, \" + \n",
324
+ " \"in its original language.\\n\\nChat History:\\n{chat_history}\\nFollow Up Input: {question}\\nStandalone question:\")\n",
325
+ "\n",
326
+ "DEFAULT_QUESTION_PROMPT_TEMPLATE = (\"Use the following portion of a long document to see if any of the text is relevant to creating a response to the question.\" +\n",
327
+ " \"\\nReturn any relevant text verbatim.\\n{context}\\nQuestion: {question}\\nRelevant text, if any:\")\n",
328
+ "\n",
329
+ "DEFAULT_COMBINE_PROMPT_TEMPLATE = (\"Given the following extracted parts of a long document and the given prompt, create a final answer with references ('SOURCES'). \"+\n",
330
+ " \"If you don't have a response, just say that you are unable to come up with a response. \"+\n",
331
+ " \"\\nSOURCES:\\n\\nQUESTION: {question}\\n=========\\n{summaries}\\n=========\\nFINAL ANSWER:'\")\n",
332
+ "\n",
333
+ "def create_tutor_mdl_chain(kind='llm', mdl=None, prompt_template = None, **kwargs):\n",
334
+ " \n",
335
+ " #Validate parameters\n",
336
+ " if mdl is None:\n",
337
+ " mdl = create_model()\n",
338
+ " kind = kind.lower()\n",
339
+ " \n",
340
+ " #Create model chain\n",
341
+ " if kind == 'llm':\n",
342
+ " if prompt_template is None:\n",
343
+ " prompt_template = create_base_tutoring_prompt()\n",
344
+ " mdl_chain = LLMChain(llm=mdl, prompt=prompt_template, **kwargs)\n",
345
+ " elif kind == 'conversational':\n",
346
+ " if prompt_template is None:\n",
347
+ " prompt_template = PromptTemplate.from_template(DEFAULT_CONDENSE_PROMPT_TEMPLATE)\n",
348
+ " mdl_chain = ConversationalRetrieverChain.from_llm(mdl, condense_question_prompt = prompt_template, **kwargs)\n",
349
+ " elif kind == 'retrieval_qa':\n",
350
+ " if prompt_template is None:\n",
351
+ "\n",
352
+ " #Create custom human prompt to take in summaries\n",
353
+ " human_prompt = PromptTemplate(template = HUMAN_RETRIEVER_RESPONSE_TEMPLATE,\n",
354
+ " input_variables=['assessment_request', 'learning_objectives', 'summaries'])\n",
355
+ " prompt_template = create_base_tutoring_prompt(human_prompt=human_prompt)\n",
356
+ " \n",
357
+ " #Create the combination prompt and model\n",
358
+ " question_template = PromptTemplate.from_template(DEFAULT_QUESTION_PROMPT_TEMPLATE)\n",
359
+ " mdl_chain = RetrievalQAWithSourcesChain.from_llm(llm=mdl, question_prompt=question_template, combine_prompt = prompt_template, **kwargs)\n",
360
+ " else:\n",
361
+ " raise NotImplementedError(f\"Model kind {kind} not implemented\")\n",
362
+ " \n",
363
+ " return mdl_chain"
364
+ ]
365
+ },
366
+ {
367
+ "cell_type": "markdown",
368
+ "metadata": {},
369
+ "source": [
370
+ "**Another brief test of behavior of these functions**"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": null,
376
+ "metadata": {},
377
+ "outputs": [],
378
+ "source": [
379
+ "res = get_tutoring_answer('The dog is super cute', chat_mdl)\n",
380
+ "print(res)"
381
+ ]
382
+ },
383
+ {
384
+ "cell_type": "markdown",
385
+ "metadata": {},
386
+ "source": [
387
+ "### Validate LLM Chain"
388
+ ]
389
+ },
390
+ {
391
+ "cell_type": "code",
392
+ "execution_count": null,
393
+ "metadata": {},
394
+ "outputs": [],
395
+ "source": [
396
+ "# Try llm model chain, making sure we've set the API key\n",
397
+ "llm_chain_test = create_tutor_mdl_chain('llm')\n",
398
+ "res = llm_chain_test.run({'context':'some context', 'assessment_request':'some assessment', 'learning_objectives':'some prompt'})"
399
+ ]
400
+ },
401
+ {
402
+ "cell_type": "code",
403
+ "execution_count": null,
404
+ "metadata": {},
405
+ "outputs": [
406
+ {
407
+ "name": "stdout",
408
+ "output_type": "stream",
409
+ "text": [
410
+ "<class 'langchain.chains.llm.LLMChain'>\n"
411
+ ]
412
+ },
413
+ {
414
+ "data": {
415
+ "text/plain": [
416
+ "'Sure, I can help you with that. Please provide me with the specific text that you would like me to base my questions on.'"
417
+ ]
418
+ },
419
+ "execution_count": null,
420
+ "metadata": {},
421
+ "output_type": "execute_result"
422
+ }
423
+ ],
424
+ "source": [
425
+ "# Verify information about the cell above\n",
426
+ "print(type(llm_chain_test))\n",
427
+ "print(res)\n",
428
+ "\n",
429
+ "# unit tests\n",
430
+ "assert isinstance(llm_chain_test, LLMChain), 'the output of llm create_tutor_mdl_chain should be an instance of LLMChain'\n",
431
+ "assert isinstance(res, str), 'the output of running the llm chain should be of type string.'"
432
+ ]
433
+ },
434
+ {
435
+ "cell_type": "markdown",
436
+ "metadata": {},
437
+ "source": [
438
+ "Now, we'll try this with just the default function to run things..."
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "code",
443
+ "execution_count": null,
444
+ "metadata": {},
445
+ "outputs": [
446
+ {
447
+ "data": {
448
+ "text/plain": [
449
+ "{'context': 'some context',\n",
450
+ " 'assessment_request': 'Please design a 5 question short answer quiz about the provided text.',\n",
451
+ " 'learning_objectives': 'Identify and comprehend the important topics and underlying messages and connections within the text',\n",
452
+ " 'text': 'Question 1: What are the main topics discussed in the text?\\n\\n(Note: Please provide your answer and I will provide feedback accordingly.)'}"
453
+ ]
454
+ },
455
+ "execution_count": null,
456
+ "metadata": {},
457
+ "output_type": "execute_result"
458
+ }
459
+ ],
460
+ "source": [
461
+ "res = get_tutoring_answer(context='some context', tutor_mdl = llm_chain_test)\n",
462
+ "res"
463
+ ]
464
+ },
465
+ {
466
+ "cell_type": "markdown",
467
+ "metadata": {},
468
+ "source": [
469
+ "OK, this base functionality is looking good."
470
+ ]
471
+ }
472
+ ],
473
+ "metadata": {
474
+ "kernelspec": {
475
+ "display_name": "python3",
476
+ "language": "python",
477
+ "name": "python3"
478
+ }
479
+ },
480
+ "nbformat": 4,
481
+ "nbformat_minor": 2
482
+ }
lo-achievement/nbs/self_study_prompts.ipynb ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# self_study_prompts.ipynb\n",
8
+ "> A listing of all prompts for self-study\n",
9
+ "\n",
10
+ "This notebook contains all prompts used for self-study as a central place that can be monitored and evaluated for appropriate functionality. Note that these perform the requests part of the prompts.\n",
11
+ "\n",
12
+ ":::{.callout-caution}\n",
13
+ "These notebooks are development notebooks, meaning that they are meant to be run locally or somewhere that supports navigating a full repository (in other words, not Google Colab unless you clone the entire repository to drive and then mount the Drive-Repository.) However, it is expected if you're able to do all of those steps, you're likely also able to figure out the required pip installs for development there.\n",
14
+ ":::"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "raw",
19
+ "metadata": {},
20
+ "source": [
21
+ "---\n",
22
+ "skip_exec: true\n",
23
+ "---"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": null,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "#| default_exp SelfStudyPrompts"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "markdown",
37
+ "metadata": {},
38
+ "source": [
39
+ "## Self-study texts\n",
40
+ "We'll now define the text for our self-study questions. Note that these will align with `assessment_request` in the `PromptInteractionBase` module."
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "#| export\n",
50
+ "# used for pretty display\n",
51
+ "import pandas as pd"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": null,
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "#| export\n",
61
+ "MC_QUIZ_DEFAULT = \"Please design a 5 question multiple choice quiz about the provided text.\"\n",
62
+ "\n",
63
+ "SHORT_ANSWER_DEFAULT = (\"Please design a 5 question short answer quiz about the provided text. \"\n",
64
+ " \"The question types should be short answer. Expect the correct answers to be a few sentences long.\")\n",
65
+ "\n",
66
+ "FILL_BLANK_DEFAULT = \"\"\"Create a 5 question fill in the blank quiz referencing parts of the provided text.\n",
67
+ "The \"blank\" part of the question should appear as \"________\". The answers should reflect what word(s) should go in the blank an accurate statement.\n",
68
+ "An example is as follows: \"The author of the book is ______.\" The question should be a statement.\n",
69
+ "\"\"\"\n",
70
+ "\n",
71
+ "SEQUENCING_DEFAULT = \"\"\"Create a 5 question questionnaire that will ask me to recall the steps or sequence of events\n",
72
+ "in the provided text.\"\"\"\n",
73
+ "\n",
74
+ "RELATIONSHIP_DEFAULT = (\"Create a 5 question quiz for the student that asks the student to identify relationships between\"\n",
75
+ " \"topics or concepts that are important to understanding this text.\")\n",
76
+ "\n",
77
+ "CONCEPTS_DEFAULT = \"\"\" Design a 5 question quiz that asks me about definitions or concepts of importance in the provided text.\"\"\"\n",
78
+ "\n",
79
+ "REAL_WORLD_EXAMPLE_DEFAULT = \"\"\"Demonstrate how the provided context can be applied to solve a real world problem.\n",
80
+ "Ask me questions about how the demonstration you provided relates to solving a real world problem.\"\"\"\n",
81
+ "\n",
82
+ "RANDOMIZED_QUESTIONS_DEFAULT = \"\"\"Generate a high-quality assessment consisting of 5 varied questions,\n",
83
+ "each of different types (open-ended, multiple choice, short answer, analogies, etc.)\"\"\"\n",
84
+ "\n",
85
+ "SELF_STUDY_PROMPT_NAMES = ['MC_QUIZ_DEFAULT',\n",
86
+ "'SHORT_ANSWER_DEFAULT',\n",
87
+ "'FILL_BLANK_DEFAULT',\n",
88
+ "'SEQUENCING_DEFAULT',\n",
89
+ "'RELATIONSHIP_DEFAULT',\n",
90
+ "'CONCEPTS_DEFAULT',\n",
91
+ "'REAL_WORLD_EXAMPLE_DEFAULT',\n",
92
+ "'RANDOMIZED_QUESTIONS_DEFAULT']"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "markdown",
97
+ "metadata": {},
98
+ "source": [
99
+ "## Create functions to assist with creating prompts\n",
100
+ "Now, we'll use this section in order to create some functions which will allow the user to display all available prompts."
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": null,
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "#| export\n",
110
+ "# Define self study dictionary for lookup\n",
111
+ "SELF_STUDY_DEFAULTS = {'mc': MC_QUIZ_DEFAULT,\n",
112
+ "'short_answer': SHORT_ANSWER_DEFAULT,\n",
113
+ "'fill_blank': FILL_BLANK_DEFAULT,\n",
114
+ "'sequencing': SEQUENCING_DEFAULT,\n",
115
+ "'relationships': RELATIONSHIP_DEFAULT,\n",
116
+ "'concepts': CONCEPTS_DEFAULT,\n",
117
+ "'real_world_example': REAL_WORLD_EXAMPLE_DEFAULT,\n",
118
+ "'randomized_questions': RANDOMIZED_QUESTIONS_DEFAULT\n",
119
+ "} \n",
120
+ "\n",
121
+ "# Return list of all self study prompts\n",
122
+ "def list_all_self_study_prompt_keys():\n",
123
+ " return list(SELF_STUDY_DEFAULTS.keys())\n",
124
+ "\n",
125
+ "def list_all_self_study_prompts():\n",
126
+ " return list(SELF_STUDY_DEFAULTS.values())\n",
127
+ " \n",
128
+ "# Return list of all self study variable names\n",
129
+ "def list_default_self_prompt_varnames():\n",
130
+ " return SELF_STUDY_PROMPT_NAMES\n",
131
+ "\n",
132
+ "# Print as a table\n",
133
+ "def print_all_self_study_prompts():\n",
134
+ " with pd.option_context('max_colwidth', None):\n",
135
+ " display(pd.DataFrame({'SELF_STUDY_DEFAULTS key': list(SELF_STUDY_DEFAULTS.keys()),\n",
136
+ " 'Prompt': list(SELF_STUDY_DEFAULTS.values())}))\n"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "markdown",
141
+ "metadata": {},
142
+ "source": [
143
+ "Now, we'll have quick unit test just to make sure this is working correctly."
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "code",
148
+ "execution_count": null,
149
+ "metadata": {},
150
+ "outputs": [
151
+ {
152
+ "data": {
153
+ "text/plain": [
154
+ "['mc',\n",
155
+ " 'short_answers',\n",
156
+ " 'fill_blanks',\n",
157
+ " 'sequencing',\n",
158
+ " 'relationships',\n",
159
+ " 'concepts',\n",
160
+ " 'real_world_example',\n",
161
+ " 'randomized_questions']"
162
+ ]
163
+ },
164
+ "execution_count": null,
165
+ "metadata": {},
166
+ "output_type": "execute_result"
167
+ }
168
+ ],
169
+ "source": [
170
+ "list_all_self_study_prompt_keys()"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": null,
176
+ "metadata": {},
177
+ "outputs": [
178
+ {
179
+ "data": {
180
+ "text/plain": [
181
+ "['Please design a 5 question quiz about the provided text.',\n",
182
+ " 'Please design a 5 question short answer quiz about the provided text. The question types should be short answer. Expect the correct answers to be a few sentences long.',\n",
183
+ " 'Create a 5 question fill in the blank quiz referencing parts of the provided text.\\nThe \"blank\" part of the question should appear as \"________\". The answers should reflect what word(s) should go in the blank an accurate statement.\\nAn example is as follows: \"The author of the book is ______.\" The question should be a statement.\\n',\n",
184
+ " 'Create a 5 question questionnaire that will ask me to recall the steps or sequence of events\\nin the provided text.',\n",
185
+ " 'Please design a 5 question quiz that asks me to draw or explain relationships\\nbetween important concepts or topics in the provided text.',\n",
186
+ " ' Design a 5 question quiz that asks me about definitions or concepts of importance in the provided text.',\n",
187
+ " 'Demonstrate how the provided context can be applied to solve a real world problem.\\nAsk me questions about how the demonstration you provided relates to solving a real world problem.',\n",
188
+ " 'Generate a high-quality assessment consisting of 5 varied questions,\\neach of different types (open-ended, multiple choice, short answer, analogies, etc.)']"
189
+ ]
190
+ },
191
+ "execution_count": null,
192
+ "metadata": {},
193
+ "output_type": "execute_result"
194
+ }
195
+ ],
196
+ "source": [
197
+ "list_all_self_study_prompts()"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": null,
203
+ "metadata": {},
204
+ "outputs": [
205
+ {
206
+ "data": {
207
+ "text/plain": [
208
+ "['MC_QUIZ_DEFAULT',\n",
209
+ " 'SHORT_ANSWER_DEFAULT',\n",
210
+ " 'FILL_BLANK_DEFAULT',\n",
211
+ " 'SEQUENCING_DEFAULT',\n",
212
+ " 'RELATIONSHIP_DEFAULT',\n",
213
+ " 'CONCEPTS_DEFAULT',\n",
214
+ " 'REAL_WORLD_EXAMPLE_DEFAULT',\n",
215
+ " 'RANDOMIZED_QUESTIONS_DEFAULT']"
216
+ ]
217
+ },
218
+ "execution_count": null,
219
+ "metadata": {},
220
+ "output_type": "execute_result"
221
+ }
222
+ ],
223
+ "source": [
224
+ "list_default_self_prompt_varnames()"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": null,
230
+ "metadata": {},
231
+ "outputs": [
232
+ {
233
+ "data": {
234
+ "text/html": [
235
+ "<div>\n",
236
+ "<style scoped>\n",
237
+ " .dataframe tbody tr th:only-of-type {\n",
238
+ " vertical-align: middle;\n",
239
+ " }\n",
240
+ "\n",
241
+ " .dataframe tbody tr th {\n",
242
+ " vertical-align: top;\n",
243
+ " }\n",
244
+ "\n",
245
+ " .dataframe thead th {\n",
246
+ " text-align: right;\n",
247
+ " }\n",
248
+ "</style>\n",
249
+ "<table border=\"1\" class=\"dataframe\">\n",
250
+ " <thead>\n",
251
+ " <tr style=\"text-align: right;\">\n",
252
+ " <th></th>\n",
253
+ " <th>Variable Name</th>\n",
254
+ " <th>Prompt</th>\n",
255
+ " </tr>\n",
256
+ " </thead>\n",
257
+ " <tbody>\n",
258
+ " <tr>\n",
259
+ " <th>0</th>\n",
260
+ " <td>mc</td>\n",
261
+ " <td>Please design a 5 question quiz about the provided text.</td>\n",
262
+ " </tr>\n",
263
+ " <tr>\n",
264
+ " <th>1</th>\n",
265
+ " <td>short_answers</td>\n",
266
+ " <td>Please design a 5 question short answer quiz about the provided text. The question types should be short answer. Expect the correct answers to be a few sentences long.</td>\n",
267
+ " </tr>\n",
268
+ " <tr>\n",
269
+ " <th>2</th>\n",
270
+ " <td>fill_blanks</td>\n",
271
+ " <td>Create a 5 question fill in the blank quiz referencing parts of the provided text.\\nThe \"blank\" part of the question should appear as \"________\". The answers should reflect what word(s) should go in the blank an accurate statement.\\nAn example is as follows: \"The author of the book is ______.\" The question should be a statement.\\n</td>\n",
272
+ " </tr>\n",
273
+ " <tr>\n",
274
+ " <th>3</th>\n",
275
+ " <td>sequencing</td>\n",
276
+ " <td>Create a 5 question questionnaire that will ask me to recall the steps or sequence of events\\nin the provided text.</td>\n",
277
+ " </tr>\n",
278
+ " <tr>\n",
279
+ " <th>4</th>\n",
280
+ " <td>relationships</td>\n",
281
+ " <td>Please design a 5 question quiz that asks me to draw or explain relationships\\nbetween important concepts or topics in the provided text.</td>\n",
282
+ " </tr>\n",
283
+ " <tr>\n",
284
+ " <th>5</th>\n",
285
+ " <td>concepts</td>\n",
286
+ " <td>Design a 5 question quiz that asks me about definitions or concepts of importance in the provided text.</td>\n",
287
+ " </tr>\n",
288
+ " <tr>\n",
289
+ " <th>6</th>\n",
290
+ " <td>real_world_example</td>\n",
291
+ " <td>Demonstrate how the provided context can be applied to solve a real world problem.\\nAsk me questions about how the demonstration you provided relates to solving a real world problem.</td>\n",
292
+ " </tr>\n",
293
+ " <tr>\n",
294
+ " <th>7</th>\n",
295
+ " <td>randomized_questions</td>\n",
296
+ " <td>Generate a high-quality assessment consisting of 5 varied questions,\\neach of different types (open-ended, multiple choice, short answer, analogies, etc.)</td>\n",
297
+ " </tr>\n",
298
+ " </tbody>\n",
299
+ "</table>\n",
300
+ "</div>"
301
+ ],
302
+ "text/plain": [
303
+ " Variable Name \\\n",
304
+ "0 mc \n",
305
+ "1 short_answers \n",
306
+ "2 fill_blanks \n",
307
+ "3 sequencing \n",
308
+ "4 relationships \n",
309
+ "5 concepts \n",
310
+ "6 real_world_example \n",
311
+ "7 randomized_questions \n",
312
+ "\n",
313
+ " Prompt \n",
314
+ "0 Please design a 5 question quiz about the provided text. \n",
315
+ "1 Please design a 5 question short answer quiz about the provided text. The question types should be short answer. Expect the correct answers to be a few sentences long. \n",
316
+ "2 Create a 5 question fill in the blank quiz referencing parts of the provided text.\\nThe \"blank\" part of the question should appear as \"________\". The answers should reflect what word(s) should go in the blank an accurate statement.\\nAn example is as follows: \"The author of the book is ______.\" The question should be a statement.\\n \n",
317
+ "3 Create a 5 question questionnaire that will ask me to recall the steps or sequence of events\\nin the provided text. \n",
318
+ "4 Please design a 5 question quiz that asks me to draw or explain relationships\\nbetween important concepts or topics in the provided text. \n",
319
+ "5 Design a 5 question quiz that asks me about definitions or concepts of importance in the provided text. \n",
320
+ "6 Demonstrate how the provided context can be applied to solve a real world problem.\\nAsk me questions about how the demonstration you provided relates to solving a real world problem. \n",
321
+ "7 Generate a high-quality assessment consisting of 5 varied questions,\\neach of different types (open-ended, multiple choice, short answer, analogies, etc.) "
322
+ ]
323
+ },
324
+ "metadata": {},
325
+ "output_type": "display_data"
326
+ }
327
+ ],
328
+ "source": [
329
+ "print_all_self_study_prompts()"
330
+ ]
331
+ }
332
+ ],
333
+ "metadata": {
334
+ "kernelspec": {
335
+ "display_name": "python3",
336
+ "language": "python",
337
+ "name": "python3"
338
+ }
339
+ },
340
+ "nbformat": 4,
341
+ "nbformat_minor": 2
342
+ }
lo-achievement/nbs/styles.css ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .cell {
2
+ margin-bottom: 1rem;
3
+ }
4
+
5
+ .cell > .sourceCode {
6
+ margin-bottom: 0;
7
+ }
8
+
9
+ .cell-output > pre {
10
+ margin-bottom: 0;
11
+ }
12
+
13
+ .cell-output > pre, .cell-output > .sourceCode > pre, .cell-output-stdout > pre {
14
+ margin-left: 0.8rem;
15
+ margin-top: 0;
16
+ background: none;
17
+ border-left: 2px solid lightsalmon;
18
+ border-top-left-radius: 0;
19
+ border-top-right-radius: 0;
20
+ }
21
+
22
+ .cell-output > .sourceCode {
23
+ border: none;
24
+ }
25
+
26
+ .cell-output > .sourceCode {
27
+ background: none;
28
+ margin-top: 0;
29
+ }
30
+
31
+ div.description {
32
+ padding-left: 2px;
33
+ padding-top: 5px;
34
+ font-style: italic;
35
+ font-size: 135%;
36
+ opacity: 70%;
37
+ }
lo-achievement/prompt_with_context.ipynb ADDED
@@ -0,0 +1,796 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/main/prompt_with_context.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "# LLMs for Self-Study\n",
15
+ "> A prompt and code template for better understanding texts\n",
16
+ "\n",
17
+ "This notebook provides a guide for using LLMs for self-study programmatically. A number of prompt templates are provided to assist with generating great assessments for self-study, and code is additionally provided for fast usage. This notebook is best leveraged for a set of documents (text or PDF preferred) **to be uploaded** for interaction with the model.\n",
18
+ "\n",
19
+ "This version of the notebook is best suited for those who prefer to use files from their local drive as context rather than copy and pasting directly into the notebook to be used as context for the model. If you prefer to copy and paste text, you should direct yourself to the [prompt_with_context](https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/main/prompt_with_context.ipynb) notebook."
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "markdown",
24
+ "metadata": {},
25
+ "source": [
26
+ "# Code Setup\n",
27
+ "Run the following cells to setup the rest of the environment for prompting. In the following section, we set up the computational environment with imported code, setup your API key access to OpenAI, and loading access to your language model. Note that the following cells may take a long time to run."
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "markdown",
32
+ "metadata": {},
33
+ "source": [
34
+ "## Library installation and loading\n",
35
+ "The following `pip install` code should be run if you're using Google Colab, or otherwise do not have a computational environment (e.g., _venv_, _conda virtual environment_, _Docker, Singularity, or other container_) with these packages installed."
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "raw",
40
+ "metadata": {},
41
+ "source": [
42
+ "---\n",
43
+ "skip_exec: true\n",
44
+ "---"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": null,
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "# run this code if you're using Google Colab or don't have these packages installed in your computing environment\n",
54
+ "! pip install pip install git+https://<token>@github.com/vanderbilt-data-science/lo-achievement.git"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": null,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "# import required libraries\n",
64
+ "import numpy as np\n",
65
+ "import getpass\n",
66
+ "import os\n",
67
+ "from langchain.chat_models import ChatOpenAI\n",
68
+ "from langchain.chains import RetrievalQA\n",
69
+ "from langchain.schema import SystemMessage, HumanMessage, AIMessage"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": null,
75
+ "metadata": {},
76
+ "outputs": [],
77
+ "source": [
78
+ "# libraries from our package\n",
79
+ "from ai_classroom_suite.PromptInteractionBase import *\n",
80
+ "import ai_classroom_suite.SelfStudyPrompts as ssp"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "markdown",
85
+ "metadata": {},
86
+ "source": [
87
+ "## API and model setup\n",
88
+ "\n",
89
+ "Use these cells to load the API keys required for this notebook and create a basic OpenAI LLM model. The code below uses the variable you created above when you input your API Key."
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": null,
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "# Set up OpenAI API Key\n",
99
+ "set_openai_key()\n",
100
+ "\n",
101
+ "# Create model\n",
102
+ "mdl = create_model()"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "markdown",
107
+ "metadata": {},
108
+ "source": [
109
+ "# Inspect Available Default Prompts\n",
110
+ "A number of default prompts have been provided for you so you don't need to form your own prompt to begin with. They will be listed below, and the different ways to interact with them are displayed."
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": null,
116
+ "metadata": {},
117
+ "outputs": [
118
+ {
119
+ "data": {
120
+ "text/html": [
121
+ "<div>\n",
122
+ "<style scoped>\n",
123
+ " .dataframe tbody tr th:only-of-type {\n",
124
+ " vertical-align: middle;\n",
125
+ " }\n",
126
+ "\n",
127
+ " .dataframe tbody tr th {\n",
128
+ " vertical-align: top;\n",
129
+ " }\n",
130
+ "\n",
131
+ " .dataframe thead th {\n",
132
+ " text-align: right;\n",
133
+ " }\n",
134
+ "</style>\n",
135
+ "<table border=\"1\" class=\"dataframe\">\n",
136
+ " <thead>\n",
137
+ " <tr style=\"text-align: right;\">\n",
138
+ " <th></th>\n",
139
+ " <th>SELF_STUDY_DEFAULTS key</th>\n",
140
+ " <th>Prompt</th>\n",
141
+ " </tr>\n",
142
+ " </thead>\n",
143
+ " <tbody>\n",
144
+ " <tr>\n",
145
+ " <th>0</th>\n",
146
+ " <td>mc</td>\n",
147
+ " <td>Please design a 5 question multiple choice quiz about the provided text.</td>\n",
148
+ " </tr>\n",
149
+ " <tr>\n",
150
+ " <th>1</th>\n",
151
+ " <td>short_answer</td>\n",
152
+ " <td>Please design a 5 question short answer quiz about the provided text. The question types should be short answer. Expect the correct answers to be a few sentences long.</td>\n",
153
+ " </tr>\n",
154
+ " <tr>\n",
155
+ " <th>2</th>\n",
156
+ " <td>fill_blank</td>\n",
157
+ " <td>Create a 5 question fill in the blank quiz referencing parts of the provided text.\\nThe \"blank\" part of the question should appear as \"________\". The answers should reflect what word(s) should go in the blank an accurate statement.\\nAn example is as follows: \"The author of the book is ______.\" The question should be a statement.\\n</td>\n",
158
+ " </tr>\n",
159
+ " <tr>\n",
160
+ " <th>3</th>\n",
161
+ " <td>sequencing</td>\n",
162
+ " <td>Create a 5 question questionnaire that will ask me to recall the steps or sequence of events\\nin the provided text.</td>\n",
163
+ " </tr>\n",
164
+ " <tr>\n",
165
+ " <th>4</th>\n",
166
+ " <td>relationships</td>\n",
167
+ " <td>Create a 5 question quiz for the student that asks the student to identify relationships betweentopics or concepts that are important to understanding this text.</td>\n",
168
+ " </tr>\n",
169
+ " <tr>\n",
170
+ " <th>5</th>\n",
171
+ " <td>concepts</td>\n",
172
+ " <td>Design a 5 question quiz that asks me about definitions or concepts of importance in the provided text.</td>\n",
173
+ " </tr>\n",
174
+ " <tr>\n",
175
+ " <th>6</th>\n",
176
+ " <td>real_world_example</td>\n",
177
+ " <td>Demonstrate how the provided context can be applied to solve a real world problem.\\nAsk me questions about how the demonstration you provided relates to solving a real world problem.</td>\n",
178
+ " </tr>\n",
179
+ " <tr>\n",
180
+ " <th>7</th>\n",
181
+ " <td>randomized_questions</td>\n",
182
+ " <td>Generate a high-quality assessment consisting of 5 varied questions,\\neach of different types (open-ended, multiple choice, short answer, analogies, etc.)</td>\n",
183
+ " </tr>\n",
184
+ " </tbody>\n",
185
+ "</table>\n",
186
+ "</div>"
187
+ ],
188
+ "text/plain": [
189
+ " SELF_STUDY_DEFAULTS key \\\n",
190
+ "0 mc \n",
191
+ "1 short_answer \n",
192
+ "2 fill_blank \n",
193
+ "3 sequencing \n",
194
+ "4 relationships \n",
195
+ "5 concepts \n",
196
+ "6 real_world_example \n",
197
+ "7 randomized_questions \n",
198
+ "\n",
199
+ " Prompt \n",
200
+ "0 Please design a 5 question multiple choice quiz about the provided text. \n",
201
+ "1 Please design a 5 question short answer quiz about the provided text. The question types should be short answer. Expect the correct answers to be a few sentences long. \n",
202
+ "2 Create a 5 question fill in the blank quiz referencing parts of the provided text.\\nThe \"blank\" part of the question should appear as \"________\". The answers should reflect what word(s) should go in the blank an accurate statement.\\nAn example is as follows: \"The author of the book is ______.\" The question should be a statement.\\n \n",
203
+ "3 Create a 5 question questionnaire that will ask me to recall the steps or sequence of events\\nin the provided text. \n",
204
+ "4 Create a 5 question quiz for the student that asks the student to identify relationships betweentopics or concepts that are important to understanding this text. \n",
205
+ "5 Design a 5 question quiz that asks me about definitions or concepts of importance in the provided text. \n",
206
+ "6 Demonstrate how the provided context can be applied to solve a real world problem.\\nAsk me questions about how the demonstration you provided relates to solving a real world problem. \n",
207
+ "7 Generate a high-quality assessment consisting of 5 varied questions,\\neach of different types (open-ended, multiple choice, short answer, analogies, etc.) "
208
+ ]
209
+ },
210
+ "metadata": {},
211
+ "output_type": "display_data"
212
+ }
213
+ ],
214
+ "source": [
215
+ "# show all prompts and names\n",
216
+ "ssp.print_all_self_study_prompts()"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": null,
222
+ "metadata": {},
223
+ "outputs": [
224
+ {
225
+ "data": {
226
+ "text/plain": [
227
+ "'Please design a 5 question multiple choice quiz about the provided text.'"
228
+ ]
229
+ },
230
+ "execution_count": null,
231
+ "metadata": {},
232
+ "output_type": "execute_result"
233
+ }
234
+ ],
235
+ "source": [
236
+ "# accessing texts of desired assessment types\n",
237
+ "ssp.SELF_STUDY_DEFAULTS['mc']"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "markdown",
242
+ "metadata": {},
243
+ "source": [
244
+ "# Add your context and assign the prefix to your query.\n",
245
+ "The query assigned here serves as an example."
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "code",
250
+ "execution_count": null,
251
+ "metadata": {},
252
+ "outputs": [],
253
+ "source": [
254
+ "context = \"\"\" Two roads diverged in a yellow wood,\n",
255
+ "And sorry I could not travel both\n",
256
+ "And be one traveler, long I stood\n",
257
+ "And looked down one as far as I could\n",
258
+ "To where it bent in the undergrowth;\n",
259
+ "Then took the other, as just as fair,\n",
260
+ "And having perhaps the better claim,\n",
261
+ "Because it was grassy and wanted wear;\n",
262
+ "Though as for that the passing there\n",
263
+ "Had worn them really about the same,\n",
264
+ "And both that morning equally lay\n",
265
+ "In leaves no step had trodden black.\n",
266
+ "Oh, I kept the first for another day!\n",
267
+ "Yet knowing how way leads on to way,\n",
268
+ "I doubted if I should ever come back.\n",
269
+ "I shall be telling this with a sigh\n",
270
+ "Somewhere ages and ages hence:\n",
271
+ "Two roads diverged in a wood, and I—\n",
272
+ "I took the one less traveled by,\n",
273
+ "And that has made all the difference.\n",
274
+ "—-Robert Frost—-\n",
275
+ "Education Place: http://www.eduplace.com \"\"\""
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "markdown",
280
+ "metadata": {},
281
+ "source": [
282
+ "# A guide to prompting for self-study\n",
283
+ "In this section, we provide a number of different approaches for using AI to help you assess and explain the knowledge of your document. Start by interacting with the model and then try out the rest of the prompts!"
284
+ ]
285
+ },
286
+ {
287
+ "cell_type": "markdown",
288
+ "metadata": {},
289
+ "source": [
290
+ "## Interact with the model\n",
291
+ "\n",
292
+ "Now that your context is created, you can begin interacting with the model! Below, we have a comprehensive list of examples using different question types, but feel free to use this code block to experiment with the model.\n",
293
+ "\n",
294
+ "First, let's make the settings for the query. In other words, what are the learning objectives and what is the type of assessment we want to have?"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "execution_count": null,
300
+ "metadata": {},
301
+ "outputs": [],
302
+ "source": [
303
+ "# set short answer as the assessment type\n",
304
+ "assessment_type = ssp.SELF_STUDY_DEFAULTS[\"short_answer\"]\n",
305
+ "\n",
306
+ "# set learning objectives if desired\n",
307
+ "learning_objs = (\"\"\"1. Identify the key elements of the poem: narrator, setting, and underlying message.\n",
308
+ " 2. Understand the literary devices used in poetry and their purposes.\"\"\")"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "markdown",
313
+ "metadata": {},
314
+ "source": [
315
+ "Next, let's use the predefined defaults with the model and provided APIs from `ai_classroom_suite`."
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": null,
321
+ "metadata": {},
322
+ "outputs": [
323
+ {
324
+ "data": {
325
+ "text/plain": [
326
+ "'Question 1: Who is the narrator of the poem and what is the setting?\\n\\nPlease provide your answer in a few sentences.'"
327
+ ]
328
+ },
329
+ "execution_count": null,
330
+ "metadata": {},
331
+ "output_type": "execute_result"
332
+ }
333
+ ],
334
+ "source": [
335
+ "# Ask the tutor to prompt you based on the text\n",
336
+ "get_tutoring_answer(context, mdl, assessment_request = assessment_type,\n",
337
+ " learning_objectives = learning_objs)"
338
+ ]
339
+ },
340
+ {
341
+ "cell_type": "markdown",
342
+ "metadata": {},
343
+ "source": [
344
+ "The complete prompt sent is based on pre-generated information and can be seen below. Your context, assessment_type, and learning objectives are substituted to create a full prompt as shown below."
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": null,
350
+ "metadata": {},
351
+ "outputs": [
352
+ {
353
+ "name": "stdout",
354
+ "output_type": "stream",
355
+ "text": [
356
+ "System: You are a world-class tutor helping students to perform better on oral and written exams though interactive experiences. When assessing and evaluating students, you always ask one question at a time, and wait for the student's response before providing them with feedback. Asking one question at a time, waiting for the student's response, and then commenting on the strengths and weaknesses of their responses (when appropriate) is what makes you such a sought-after, world-class tutor.\n",
357
+ "Human: I'm trying to better understand the text provided below. Please design a 5 question short answer quiz about the provided text. The question types should be short answer. Expect the correct answers to be a few sentences long. The learning objectives to be assessed are: 1. Identify the key elements of the poem: narrator, setting, and underlying message.\n",
358
+ " 2. Understand the literary devices used in poetry and their purposes.. Although I may request more than one assessment question, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right. The text that you will base your questions on is as follows: Two roads diverged in a yellow wood,\n",
359
+ "And sorry I could not travel both\n",
360
+ "And be one traveler, long I stood\n",
361
+ "And looked down one as far as I could\n",
362
+ "To where it bent in the undergrowth;\n",
363
+ "Then took the other, as just as fair,\n",
364
+ "And having perhaps the better claim,\n",
365
+ "Because it was grassy and wanted wear;\n",
366
+ "Though as for that the passing there\n",
367
+ "Had worn them really about the same,\n",
368
+ "And both that morning equally lay\n",
369
+ "In leaves no step had trodden black.\n",
370
+ "Oh, I kept the first for another day!\n",
371
+ "Yet knowing how way leads on to way,\n",
372
+ "I doubted if I should ever come back.\n",
373
+ "I shall be telling this with a sigh\n",
374
+ "Somewhere ages and ages hence:\n",
375
+ "Two roads diverged in a wood, and I—\n",
376
+ "I took the one less traveled by,\n",
377
+ "And that has made all the difference.\n",
378
+ "—-Robert Frost—-\n",
379
+ "Education Place: http://www.eduplace.com .\n"
380
+ ]
381
+ }
382
+ ],
383
+ "source": [
384
+ "# Use different function to create the prompt\n",
385
+ "full_prompt = get_tutoring_prompt(context, assessment_request = assessment_type,\n",
386
+ " learning_objectives = learning_objs)\n",
387
+ "\n",
388
+ "# Show the prompt as a string\n",
389
+ "print(full_prompt.to_string())"
390
+ ]
391
+ },
392
+ {
393
+ "cell_type": "markdown",
394
+ "metadata": {},
395
+ "source": [
396
+ "Alternately, you can define your own prompt, which you'll as appropriate. To modify the kind of assessment you'll be asking for, change `assessment_request`. An example of how to add more context to the model is shown below as well."
397
+ ]
398
+ },
399
+ {
400
+ "cell_type": "code",
401
+ "execution_count": null,
402
+ "metadata": {},
403
+ "outputs": [
404
+ {
405
+ "data": {
406
+ "text/plain": [
407
+ "\"Question: Who is the narrator in the poem and what is the underlying message conveyed?\\n\\nHint: Pay attention to the pronouns used throughout the poem to determine the narrator's identity. Additionally, think about the choices made by the narrator and the impact those choices have on their life. The underlying message is related to the consequences of these choices.\\n\\nTake your time to reflect on the text and provide your answer when you're ready.\""
408
+ ]
409
+ },
410
+ "execution_count": null,
411
+ "metadata": {},
412
+ "output_type": "execute_result"
413
+ }
414
+ ],
415
+ "source": [
416
+ "# Use your own texts\n",
417
+ "custom_request = (\"Ask me a short answer question about the provided text. The questions you ask should allow\"\n",
418
+ " \" me to demonstrate my creativity, capacity for out-of-the-box thinking, insights, and deeper meaning \"\n",
419
+ " \" of the text.\")\n",
420
+ "additional_context = (\"This is a text written by Robert Frost, a famous American poet. The text is widely studied in K-12 literature\"\n",
421
+ " \" education courses, and should be read with an eye towards the philosophical and human themes of the text.\")\n",
422
+ "\n",
423
+ "# Concatenate context\n",
424
+ "informed_context = context + \"\\n Additional information about the text is: \" + additional_context \n",
425
+ "\n",
426
+ "# Use custom_request defined as the assessment request\n",
427
+ "get_tutoring_answer(informed_context, mdl, assessment_request = custom_request,\n",
428
+ " learning_objectives = learning_objs)"
429
+ ]
430
+ },
431
+ {
432
+ "cell_type": "markdown",
433
+ "metadata": {},
434
+ "source": [
435
+ "## Types of Questions and Prompts\n",
436
+ "\n",
437
+ "Below is a comprehensive list of question types and prompt templates designed by our team. There are also example code blocks, where you can see how the model performed with the example and try it for yourself using the prompt template."
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "markdown",
442
+ "metadata": {},
443
+ "source": [
444
+ "### Multiple Choice\n",
445
+ "\n",
446
+ "Prompt: The following text should be used as the basis for the instructions which follow: {context}. Please design a {number of questions} question quiz about {name or reference to context} which reflects the learning objectives: {list of learning objectives}. The questions should be multiple choice. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
447
+ ]
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "execution_count": null,
452
+ "metadata": {},
453
+ "outputs": [
454
+ {
455
+ "name": "stdout",
456
+ "output_type": "stream",
457
+ "text": [
458
+ "Question 1: Who is the narrator of the poem?\n",
459
+ "\n",
460
+ "A) Robert Frost\n",
461
+ "B) The traveler \n",
462
+ "C) The undergrowth \n",
463
+ "D) The wood\n",
464
+ "\n",
465
+ "Please provide your answer.\n"
466
+ ]
467
+ }
468
+ ],
469
+ "source": [
470
+ "# Multiple choice code example\n",
471
+ "tutor_q = get_tutoring_answer(context, mdl, assessment_request = ssp.SELF_STUDY_DEFAULTS['mc'],\n",
472
+ " learning_objectives = learning_objs)\n",
473
+ "print(tutor_q)"
474
+ ]
475
+ },
476
+ {
477
+ "cell_type": "markdown",
478
+ "metadata": {},
479
+ "source": [
480
+ "### Short Answer\n",
481
+ "\n",
482
+ "Prompt: Please design a {number of questions} question quiz about {context} which reflects the learning objectives: {list of learning objectives}. The questions should be short answer. Expect the correct answers to be {anticipated length} long. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
483
+ ]
484
+ },
485
+ {
486
+ "cell_type": "code",
487
+ "execution_count": null,
488
+ "metadata": {},
489
+ "outputs": [
490
+ {
491
+ "name": "stdout",
492
+ "output_type": "stream",
493
+ "text": [
494
+ "Question 1: Who is the narrator of the poem and what is the setting?\n",
495
+ "\n",
496
+ "Remember to answer the question by identifying the narrator of the poem and describing the setting in which the events take place.\n"
497
+ ]
498
+ }
499
+ ],
500
+ "source": [
501
+ "# Short answer code example\n",
502
+ "tutor_q = get_tutoring_answer(context, mdl, assessment_request = ssp.SELF_STUDY_DEFAULTS['short_answer'],\n",
503
+ " learning_objectives = learning_objs)\n",
504
+ "print(tutor_q)"
505
+ ]
506
+ },
507
+ {
508
+ "cell_type": "markdown",
509
+ "metadata": {},
510
+ "source": [
511
+ "### Fill-in-the-blank\n",
512
+ "\n",
513
+ "Prompt: Create a {number of questions} question fill in the blank quiz refrencing {context}. The quiz should reflect the learning objectives: {learning objectives}. The \"blank\" part of the question should appear as \"________\". The answers should reflect what word(s) should go in the blank an accurate statement.\n",
514
+ "\n",
515
+ "An example is the follow: \"The author of the book is \"________.\"\n",
516
+ "\n",
517
+ "The question should be a statement. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
518
+ ]
519
+ },
520
+ {
521
+ "cell_type": "code",
522
+ "execution_count": null,
523
+ "metadata": {},
524
+ "outputs": [
525
+ {
526
+ "name": "stdout",
527
+ "output_type": "stream",
528
+ "text": [
529
+ "Question 1: The poem \"The Road Not Taken\" was written by ________.\n",
530
+ "\n",
531
+ "Question 2: What is the color of the wood where the two roads diverged? \n",
532
+ "\n",
533
+ "Question 3: What is the reason the narrator gives for choosing the second road?\n",
534
+ "\n",
535
+ "Question 4: What does the narrator say about the wear of both roads?\n",
536
+ "\n",
537
+ "Question 5: According to the poem, what has made all the difference in the narrator's life?\n",
538
+ "\n",
539
+ "Remember to wait for the student's response before providing feedback.\n"
540
+ ]
541
+ }
542
+ ],
543
+ "source": [
544
+ "# Fill in the blank code example\n",
545
+ "tutor_q = get_tutoring_answer(context, mdl, assessment_request = ssp.SELF_STUDY_DEFAULTS['fill_blank'],\n",
546
+ " learning_objectives = learning_objs)\n",
547
+ "print(tutor_q)"
548
+ ]
549
+ },
550
+ {
551
+ "cell_type": "markdown",
552
+ "metadata": {},
553
+ "source": [
554
+ "### Sequencing\n",
555
+ "\n",
556
+ "Prompt: Please develop a {number of questions} question questionnaire that will ask me to recall the steps involved in the following learning objectives in regard to {context}: {learning objectives}. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. After I respond, explain their sequence to me."
557
+ ]
558
+ },
559
+ {
560
+ "cell_type": "code",
561
+ "execution_count": null,
562
+ "metadata": {},
563
+ "outputs": [
564
+ {
565
+ "name": "stdout",
566
+ "output_type": "stream",
567
+ "text": [
568
+ "Question 1: Who is the narrator of the poem?\n",
569
+ "\n",
570
+ "Question 2: What is the setting of the poem?\n",
571
+ "\n",
572
+ "Question 3: What is the underlying message of the poem?\n",
573
+ "\n",
574
+ "Question 4: What literary device is used in the line \"Two roads diverged in a yellow wood\"?\n",
575
+ "\n",
576
+ "Question 5: What is the purpose of using the literary device in question 4?\n",
577
+ "\n",
578
+ "Please answer question 1 first.\n"
579
+ ]
580
+ }
581
+ ],
582
+ "source": [
583
+ "# Sequence example\n",
584
+ "tutor_q = get_tutoring_answer(context, mdl, assessment_request = ssp.SELF_STUDY_DEFAULTS['sequencing'],\n",
585
+ " learning_objectives = learning_objs)\n",
586
+ "\n",
587
+ "print(tutor_q)"
588
+ ]
589
+ },
590
+ {
591
+ "cell_type": "markdown",
592
+ "metadata": {},
593
+ "source": [
594
+ "### Relationships/drawing connections\n",
595
+ "\n",
596
+ "Prompt: Please design a {number of questions} question quiz that asks me to explain the relationships that exist within the following learning objectives, referencing {context}: {learning objectives}. Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
597
+ ]
598
+ },
599
+ {
600
+ "cell_type": "code",
601
+ "execution_count": null,
602
+ "metadata": {},
603
+ "outputs": [
604
+ {
605
+ "name": "stdout",
606
+ "output_type": "stream",
607
+ "text": [
608
+ "Question 1: Who is the narrator of the poem?\n",
609
+ "\n",
610
+ "Question 2: What is the setting of the poem?\n",
611
+ "\n",
612
+ "Question 3: What is the underlying message of the poem?\n",
613
+ "\n",
614
+ "Question 4: What literary device is used when the narrator says, \"Two roads diverged in a yellow wood\"?\n",
615
+ "\n",
616
+ "Question 5: What literary device is used when the narrator says, \"I took the one less traveled by, And that has made all the difference\"?\n"
617
+ ]
618
+ }
619
+ ],
620
+ "source": [
621
+ "# Relationships example\n",
622
+ "tutor_q = get_tutoring_answer(context, mdl, assessment_request = ssp.SELF_STUDY_DEFAULTS['relationships'],\n",
623
+ " learning_objectives = learning_objs)\n",
624
+ "\n",
625
+ "print(tutor_q)"
626
+ ]
627
+ },
628
+ {
629
+ "cell_type": "markdown",
630
+ "metadata": {},
631
+ "source": [
632
+ "### Concepts and Definitions\n",
633
+ "\n",
634
+ "Prompt: Design a {number of questions} question quiz that asks me about definitions related to the following learning objectives: {learning objectives} - based on {context}\".\n",
635
+ "Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right.\n"
636
+ ]
637
+ },
638
+ {
639
+ "cell_type": "code",
640
+ "execution_count": null,
641
+ "metadata": {},
642
+ "outputs": [
643
+ {
644
+ "name": "stdout",
645
+ "output_type": "stream",
646
+ "text": [
647
+ "Question 1: Who is the narrator of the poem? \n",
648
+ "\n",
649
+ "Remember, the narrator is the person who is speaking or telling the story.\n"
650
+ ]
651
+ }
652
+ ],
653
+ "source": [
654
+ "# Concepts and definitions example\n",
655
+ "tutor_q = get_tutoring_answer(context, mdl, assessment_request = ssp.SELF_STUDY_DEFAULTS['concepts'],\n",
656
+ " learning_objectives = learning_objs)\n",
657
+ "\n",
658
+ "print(tutor_q)"
659
+ ]
660
+ },
661
+ {
662
+ "cell_type": "markdown",
663
+ "metadata": {},
664
+ "source": [
665
+ "### Real Word Examples\n",
666
+ "\n",
667
+ "Prompt: Demonstrate how {context} can be applied to solve a real-world problem related to the following learning objectives: {learning objectives}. Ask me questions regarding this theory/concept.\n",
668
+ "\n",
669
+ "Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
670
+ ]
671
+ },
672
+ {
673
+ "cell_type": "code",
674
+ "execution_count": null,
675
+ "metadata": {},
676
+ "outputs": [
677
+ {
678
+ "name": "stdout",
679
+ "output_type": "stream",
680
+ "text": [
681
+ "Question 1: Who is the narrator of the poem?\n",
682
+ "\n",
683
+ "Question 2: What is the setting of the poem?\n",
684
+ "\n",
685
+ "Question 3: What is the underlying message of the poem?\n",
686
+ "\n",
687
+ "Remember to provide your answer to one question at a time.\n"
688
+ ]
689
+ }
690
+ ],
691
+ "source": [
692
+ "# Real word example\n",
693
+ "tutor_q = get_tutoring_answer(context, mdl, assessment_request = ssp.SELF_STUDY_DEFAULTS['real_world_example'],\n",
694
+ " learning_objectives = learning_objs)\n",
695
+ "\n",
696
+ "print(tutor_q)"
697
+ ]
698
+ },
699
+ {
700
+ "cell_type": "markdown",
701
+ "metadata": {},
702
+ "source": [
703
+ "### Randomized Question Types\n",
704
+ "\n",
705
+ "Prompt: Please generate a high-quality assessment consisting of {number of questions} varying questions, each of different types (open-ended, multiple choice, etc.), to determine if I achieved the following learning objectives in regards to {context}: {learning objectives}.\n",
706
+ "\n",
707
+ "Provide one question at a time, and wait for my response before providing me with feedback. Again, while the quiz may ask for multiple questions, you should only provide ONE question in you initial response. Do not include the answer in your response. If I get an answer wrong, provide me with an explanation of why it was incorrect,and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
708
+ ]
709
+ },
710
+ {
711
+ "cell_type": "code",
712
+ "execution_count": null,
713
+ "metadata": {},
714
+ "outputs": [
715
+ {
716
+ "name": "stdout",
717
+ "output_type": "stream",
718
+ "text": [
719
+ "Question 1 (Open-ended): Who is the narrator of the poem and what is the setting?\n",
720
+ "\n",
721
+ "Question 2 (Multiple choice): Which literary device is used in the line \"And sorry I could not travel both\"?\n",
722
+ "\n",
723
+ "a) Simile\n",
724
+ "b) Metaphor\n",
725
+ "c) Alliteration\n",
726
+ "d) Personification\n",
727
+ "\n",
728
+ "Question 3 (Short answer): Describe the underlying message of the poem in one sentence.\n",
729
+ "\n",
730
+ "Question 4 (Analogies): Complete the analogy: \"The two roads diverged in a yellow wood\" is to the physical setting as \"I took the one less traveled by\" is to ___________.\n",
731
+ "\n",
732
+ "Question 5 (Open-ended): Identify and explain one additional literary device used in the poem and its purpose.\n",
733
+ "\n",
734
+ "Please choose one question from above for me to provide a detailed evaluation.\n"
735
+ ]
736
+ }
737
+ ],
738
+ "source": [
739
+ "# Randomized question types\n",
740
+ "tutor_q = get_tutoring_answer(context, mdl, assessment_request = ssp.SELF_STUDY_DEFAULTS['randomized_questions'],\n",
741
+ " learning_objectives = learning_objs)\n",
742
+ "\n",
743
+ "print(tutor_q)"
744
+ ]
745
+ },
746
+ {
747
+ "cell_type": "markdown",
748
+ "metadata": {},
749
+ "source": [
750
+ "### Quantiative evaluation the correctness of a student's answer\n",
751
+ "\n",
752
+ "Prompt: (A continuation of the previous chat) Please generate the main points of the student’s answer to the previous question, and evaluate on a scale of 1 to 5 how comprehensive the student’s answer was in relation to the learning objectives, and explain why he or she received this rating, including what was missed in his or her answer if the student’s answer wasn’t complete.\n"
753
+ ]
754
+ },
755
+ {
756
+ "cell_type": "code",
757
+ "execution_count": null,
758
+ "metadata": {},
759
+ "outputs": [
760
+ {
761
+ "name": "stdout",
762
+ "output_type": "stream",
763
+ "text": [
764
+ "Based on the provided text, the student's answer to the previous question was not provided. Therefore, I cannot generate the main points of the student's answer or evaluate its comprehensiveness in relation to the learning objectives. Please provide the student's answer to the previous question so that I can assist you further.\n"
765
+ ]
766
+ }
767
+ ],
768
+ "source": [
769
+ "# qualitative evaluation\n",
770
+ "qualitative_query = \"\"\" Please generate the main points of the student’s answer to the previous question,\n",
771
+ " and evaluate on a scale of 1 to 5 how comprehensive the student’s answer was in relation to the learning objectives,\n",
772
+ " and explain why he or she received this rating, including what was missed in his or her answer if the student’s answer wasn’t complete.\"\"\"\n",
773
+ "\n",
774
+ "# Note that this uses the previous result and query in the context\n",
775
+ "last_answer = ''\n",
776
+ "\n",
777
+ "# Get result with formatting to emphasize changes in parameter inputs\n",
778
+ "result = get_tutoring_answer(last_answer + context,\n",
779
+ " mdl,\n",
780
+ " assessment_request = qualitative_query,\n",
781
+ " learning_objectives = learning_objs)\n",
782
+ "\n",
783
+ "print(result)"
784
+ ]
785
+ }
786
+ ],
787
+ "metadata": {
788
+ "kernelspec": {
789
+ "display_name": "python3",
790
+ "language": "python",
791
+ "name": "python3"
792
+ }
793
+ },
794
+ "nbformat": 4,
795
+ "nbformat_minor": 0
796
+ }
lo-achievement/prompt_with_vector_store.ipynb ADDED
@@ -0,0 +1,637 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/main/prompt_with_vector_store.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "# LLMs for Self-Study\n",
15
+ "> A prompt and code template for better understanding texts\n",
16
+ "\n",
17
+ "This notebook provides a guide for using LLMs for self-study programmatically. A number of prompt templates are provided to assist with generating great assessments for self-study, and code is additionally provided for fast usage. This notebook is best leveraged for a set of documents (text or PDF preferred) **to be uploaded** for interaction with the model.\n",
18
+ "\n",
19
+ "This version of the notebook is best suited for those who prefer to use files from their local drive as context rather than copy and pasting directly into the notebook to be used as context for the model. If you prefer to copy and paste text, you should direct yourself to the [prompt_with_context](https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/main/prompt_with_context.ipynb) notebook."
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "raw",
24
+ "metadata": {},
25
+ "source": [
26
+ "---\n",
27
+ "skip_exec: true\n",
28
+ "---"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": null,
34
+ "metadata": {},
35
+ "outputs": [],
36
+ "source": [
37
+ "# run this code if you're using Google Colab or don't have these packages installed in your computing environment\n",
38
+ "! pip install pip install git+https://<token>@github.com/vanderbilt-data-science/lo-achievement.git"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": null,
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "#libraries for user setup code\n",
48
+ "from getpass import getpass\n",
49
+ "from logging import raiseExceptions\n",
50
+ "\n",
51
+ "#self import code\n",
52
+ "from ai_classroom_suite.PromptInteractionBase import *\n",
53
+ "from ai_classroom_suite.IOHelperUtilities import *\n",
54
+ "from ai_classroom_suite.SelfStudyPrompts import *\n",
55
+ "from ai_classroom_suite.MediaVectorStores import *"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "markdown",
60
+ "metadata": {},
61
+ "source": [
62
+ "# User Settings\n",
63
+ "In this section, you'll set your OpenAI API Key (for use with the OpenAI model), configure your environment/files for upload, and upload those files."
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": null,
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "# Run this cell and enter your OpenAI API key when prompted\n",
73
+ "set_openai_key()"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "metadata": {},
80
+ "outputs": [],
81
+ "source": [
82
+ "# Create model\n",
83
+ "mdl_name = 'gpt-3.5-turbo-16k'\n",
84
+ "chat_llm = create_model(mdl_name)"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "markdown",
89
+ "metadata": {},
90
+ "source": [
91
+ "## Define Your Documents Source\n",
92
+ "You may upload your files directly from your computer, or you may choose to do so via your Google Drive. Below, you will find instructions for both methods.\n",
93
+ "\n",
94
+ "For either model, begin by setting the `upload_setting` variable to:\n",
95
+ "* `'Local Drive'` - if you have files that are on your own computer (locally), or\n",
96
+ "* `'Google Drive'` - if you have files that are stored on Google Drive\n",
97
+ "\n",
98
+ "e.g.,\n",
99
+ "`upload_setting='Google Drive'`.\n",
100
+ "Don't forget the quotes around your selection!"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": null,
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "## Settings for upload: via local drive or Google Drive\n",
110
+ "### Please input either \"Google Drive\" or \"Local Drive\" into the empty string\n",
111
+ "\n",
112
+ "#upload_setting = 'Google Drive'\n",
113
+ "upload_setting = 'Local Drive'"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "markdown",
118
+ "metadata": {},
119
+ "source": [
120
+ "<p style='color:green'><strong>Before Continuing</strong> - Make sure you have input your choice of upload into the `upload_setting`` variable above (Options: \"Local Drive\" or \"Google Drive\") as described in the above instructions.</p>"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "markdown",
125
+ "metadata": {},
126
+ "source": [
127
+ "## Upload your Files\n",
128
+ "Now, you'll upload your files. When you run the below code cell, you'll be able to follow the instructions for local or Google Drive upload described here. If you would like to use our example document (Robert Frost's \"The Road Not Taken\", you can download the file from [this link](https://drive.google.com/drive/folders/1wpEoGACUqyNRYa4zBZeNkqcLJrGQbA53?usp=sharing) and upload via the instructions above.\n",
129
+ "\n",
130
+ "**If you selected **\"Local Drive\"** :**\n",
131
+ "> If you selected Local Drive, you'll need to start by selecting your local files. Run the code cell below. Once the icon appears, click the \"Choose File\". This will direct you to your computer's local drive. Select the file you would like to upload as context. The files will appear in the right sidebar. Then follow the rest of the steps in the \"Uploading Your files (Local Drive and Google Drive)\" below.\n",
132
+ "\n",
133
+ "**If you selected **\"Google Drive\"**: **\n",
134
+ "> If you selected Google Drive, you'll need to start by allowing access to your Google Drive. Run the code cell below. You will be redirected to a window where you will allow access to your Google Drive by logging into your Google Account. Your Drive will appear as a folder in the left side panel. Navigate through your Google Drive until you've found the file that you'd like to upload.\n",
135
+ "\n",
136
+ "Your files are now accessible to the code."
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": null,
142
+ "metadata": {},
143
+ "outputs": [
144
+ {
145
+ "data": {
146
+ "application/vnd.jupyter.widget-view+json": {
147
+ "model_id": "e10a33b291a14f8089a1dea89f872998",
148
+ "version_major": 2,
149
+ "version_minor": 0
150
+ },
151
+ "text/plain": [
152
+ "FileChooser(path='/workspaces/lo-achievement', filename='', title='Use the following file chooser to add each …"
153
+ ]
154
+ },
155
+ "metadata": {},
156
+ "output_type": "display_data"
157
+ },
158
+ {
159
+ "data": {
160
+ "application/vnd.jupyter.widget-view+json": {
161
+ "model_id": "4649cff2ba0942aa9c5e073be85f40fb",
162
+ "version_major": 2,
163
+ "version_minor": 0
164
+ },
165
+ "text/plain": [
166
+ "Output()"
167
+ ]
168
+ },
169
+ "metadata": {},
170
+ "output_type": "display_data"
171
+ }
172
+ ],
173
+ "source": [
174
+ "# Run this cell then following the instructions to upload your file\n",
175
+ "selected_files = setup_drives(upload_setting)"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": null,
181
+ "metadata": {},
182
+ "outputs": [
183
+ {
184
+ "data": {
185
+ "text/plain": [
186
+ "['/workspaces/lo-achievement/roadnottaken.txt']"
187
+ ]
188
+ },
189
+ "execution_count": null,
190
+ "metadata": {},
191
+ "output_type": "execute_result"
192
+ }
193
+ ],
194
+ "source": [
195
+ "selected_files"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "markdown",
200
+ "metadata": {},
201
+ "source": [
202
+ "# Resource and Personal Tutor Creation\n",
203
+ "Congratulations! You've nearly finished with the setup! From here, you can now run this section of cells using the arrow to the left to set up your vector store and create your model."
204
+ ]
205
+ },
206
+ {
207
+ "cell_type": "markdown",
208
+ "metadata": {},
209
+ "source": [
210
+ "## Create a vector store with your document\n",
211
+ "\n",
212
+ "With the file path, you can now create a vector store using the document that you uploaded. We expose this creation in case you want to modify the kind of vector store that you're creating. Run the cell below to create the default provided vector store."
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": null,
218
+ "metadata": {},
219
+ "outputs": [],
220
+ "source": [
221
+ "# Create vector store\n",
222
+ "doc_segments = get_document_segments(selected_files, data_type = 'files')\n",
223
+ "chroma_db, vs_retriever = create_local_vector_store(doc_segments, search_kwargs={\"k\": 1})"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "markdown",
228
+ "metadata": {},
229
+ "source": [
230
+ "## Create the model which will do the vector store lookup and tutoring"
231
+ ]
232
+ },
233
+ {
234
+ "cell_type": "code",
235
+ "execution_count": null,
236
+ "metadata": {},
237
+ "outputs": [],
238
+ "source": [
239
+ "# Create retrieval chain\n",
240
+ "qa_chain = create_tutor_mdl_chain(kind=\"retrieval_qa\", retriever = vs_retriever)"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "markdown",
245
+ "metadata": {},
246
+ "source": [
247
+ "# A guide to prompting for self-study\n",
248
+ "In this section, we provide a number of different approaches for using AI to help you assess and explain the knowledge of your document. Start by interacting with the model and then try out the rest of the prompts!"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "markdown",
253
+ "metadata": {},
254
+ "source": [
255
+ "## Brief overview of tutoring code options\n",
256
+ "\n",
257
+ "Now that your vector store is created, you can begin interacting with the model! You will interact with the model with a vector store using the `get_tutoring_answer` function below, and details are provided regarding the functionality below.\n",
258
+ "\n",
259
+ "Consider the multiple choice code snippet:\n",
260
+ "```{python}\n",
261
+ "tutor_q = get_tutoring_answer(context = '',\n",
262
+ " qa_chain,\n",
263
+ " assessment_request = SELF_STUDY_DEFAULTS['mc'],\n",
264
+ " learning_objectives = learning_objs,\n",
265
+ " input_kwargs = {'question':topic})\n",
266
+ "```\n",
267
+ "\n",
268
+ "This is how we're able to interact with the model for tutoring when using vector stores. The parameters are as follows:\n",
269
+ "\n",
270
+ "* `context` will be an empty string or you can also set it to `None`. This is because this field is automatically populated using the vector store retreiver.\n",
271
+ "* `qa_chain` is the model that you're using - we created this model chain a few cells above. \n",
272
+ "* `assessment_request` is your way of telling the model what kind of assessment you want. In the example above, we use some defaults provided for multiple choice. You can also insert your own text here. To learn more about these defaults, see the `prompt_with_context.ipynb` in the CLAS repo.\n",
273
+ "* `learning_objectives` are the learning objectives that you want to assess in a single paragraph string. You can set this to '' if you don't want to define any learning objectives. If you don't provide one, the model will use the default learning objectives.\n",
274
+ "* `input_kwargs` are additional inputs that we can define in the prompts. Above, you see that the keyword `question` is defined. `question` is the text used to retrieve relevant texts from the vector store. Above, we define a custom topic. If you were to omit this parameter, the model would use `assessment_request` as the text to retrieve relevant documents from the vector store. See the examples below for both scenarios.\n",
275
+ "\n"
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "markdown",
280
+ "metadata": {},
281
+ "source": [
282
+ "## Sample topics and learning objectives\n",
283
+ "\n",
284
+ "Below, we define a topic (used to retrieve documents from the vector store if provided) and learning objectives which will be used in the following examples. You can change these as needed for your purpose."
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": null,
290
+ "metadata": {},
291
+ "outputs": [],
292
+ "source": [
293
+ "# Code topic\n",
294
+ "topic = 'The full text of the poem \"The Road Not Taken\" by Robert Frost'\n",
295
+ "\n",
296
+ "# set learning objectives if desired\n",
297
+ "learning_objs = (\"\"\"1. Identify the key elements of the work: important takeaways and underlying message.\n",
298
+ " 2. Understand the literary devices used in prompting and in literature and their purpose.\"\"\")"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "markdown",
303
+ "metadata": {},
304
+ "source": [
305
+ "## Types of Questions and Prompts\n",
306
+ "\n",
307
+ "Below is a comprehensive list of question types and prompt templates designed by our team. There are also example code blocks, where you can see how the model performed with the example and try it for yourself using the prompt template."
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "markdown",
312
+ "metadata": {},
313
+ "source": [
314
+ "### Multiple Choice\n",
315
+ "\n",
316
+ "Prompt: The following text should be used as the basis for the instructions which follow: {context}. Please design a 5 question quiz about {name or reference to context} which reflects the learning objectives: {list of learning objectives}. The questions should be multiple choice. If I get an answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice. Explain why the correct choice is right."
317
+ ]
318
+ },
319
+ {
320
+ "cell_type": "code",
321
+ "execution_count": null,
322
+ "metadata": {},
323
+ "outputs": [
324
+ {
325
+ "name": "stdout",
326
+ "output_type": "stream",
327
+ "text": [
328
+ "Question 1: What is the underlying message of the excerpt?\n",
329
+ "\n",
330
+ "A) The speaker regrets not being able to travel both roads.\n",
331
+ "B) The speaker believes that taking the less traveled road has made a significant impact on their life.\n",
332
+ "C) The speaker is unsure about which road to choose.\n",
333
+ "D) The speaker is fascinated by the beauty of the yellow wood.\n",
334
+ "\n",
335
+ "Please select one of the options (A, B, C, or D) and provide your answer.\n"
336
+ ]
337
+ }
338
+ ],
339
+ "source": [
340
+ "# Multiple choice code example\n",
341
+ "tutor_q = get_tutoring_answer('', qa_chain, assessment_request = SELF_STUDY_DEFAULTS['mc'],\n",
342
+ " learning_objectives = learning_objs, input_kwargs = {'question':topic})\n",
343
+ "\n",
344
+ "print(tutor_q)"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "markdown",
349
+ "metadata": {},
350
+ "source": [
351
+ "### Short Answer\n",
352
+ "\n",
353
+ "Prompt: Please design a 5-question quiz about {context} which reflects the learning objectives: {list of learning objectives}. The questions should be short answer. Expect the correct answers to be {anticipated length} long. If I get any part of the answer wrong, provide me with an explanation of why it was incorrect, and then give me additional chances to respond until I get the correct choice."
354
+ ]
355
+ },
356
+ {
357
+ "cell_type": "code",
358
+ "execution_count": null,
359
+ "metadata": {},
360
+ "outputs": [
361
+ {
362
+ "name": "stdout",
363
+ "output_type": "stream",
364
+ "text": [
365
+ "Question 1: What is the underlying message of the poem?\n",
366
+ "\n",
367
+ "Remember to provide your answer in a few sentences.\n"
368
+ ]
369
+ }
370
+ ],
371
+ "source": [
372
+ "# Short answer code example\n",
373
+ "tutor_q = get_tutoring_answer(None, qa_chain, assessment_request = SELF_STUDY_DEFAULTS['short_answer'],\n",
374
+ " learning_objectives = learning_objs, input_kwargs = {'question':topic})\n",
375
+ "\n",
376
+ "print(tutor_q)"
377
+ ]
378
+ },
379
+ {
380
+ "cell_type": "markdown",
381
+ "metadata": {},
382
+ "source": [
383
+ "### Fill-in-the-blank\n",
384
+ "\n",
385
+ "Prompt: Create a 5 question fill in the blank quiz refrencing {context}. The quiz should reflect the learning objectives: {learning objectives}. Please prompt me one question at a time and proceed when I answer correctly. If I answer incorrectly, please explain why my answer is incorrect.\n",
386
+ "\n",
387
+ ":::{.callout-info}\n",
388
+ "In the example below, we omit the `input_kwargs` parameter. This means we'll use the text from `assessment_request` as the question topic.\n",
389
+ ":::"
390
+ ]
391
+ },
392
+ {
393
+ "cell_type": "code",
394
+ "execution_count": null,
395
+ "metadata": {},
396
+ "outputs": [
397
+ {
398
+ "name": "stdout",
399
+ "output_type": "stream",
400
+ "text": [
401
+ "Question: The speaker in the poem \"The Road Not Taken\" is faced with a choice between _______ roads.\n",
402
+ "\n",
403
+ "Please provide your answer.\n"
404
+ ]
405
+ }
406
+ ],
407
+ "source": [
408
+ "# Fill in the blank code example\n",
409
+ "tutor_q = get_tutoring_answer(None, qa_chain, assessment_request = SELF_STUDY_DEFAULTS['fill_blank'],\n",
410
+ " learning_objectives = learning_objs)\n",
411
+ "\n",
412
+ "print(tutor_q)"
413
+ ]
414
+ },
415
+ {
416
+ "cell_type": "markdown",
417
+ "metadata": {},
418
+ "source": [
419
+ "### Sequencing\n",
420
+ "\n",
421
+ "Prompt: Please develop a 5 question questionnaire that will ask me to recall the steps involved in the following learning objectives in regard to {context}: {learning objectives}. After I respond, explain their sequence to me."
422
+ ]
423
+ },
424
+ {
425
+ "cell_type": "code",
426
+ "execution_count": null,
427
+ "metadata": {},
428
+ "outputs": [
429
+ {
430
+ "name": "stdout",
431
+ "output_type": "stream",
432
+ "text": [
433
+ "Question 1: What is the underlying message or theme of the provided text?\n",
434
+ "\n",
435
+ "(Note: Please provide your response and I will evaluate it.)\n"
436
+ ]
437
+ }
438
+ ],
439
+ "source": [
440
+ "# Sequence example\n",
441
+ "tutor_q = get_tutoring_answer(None, qa_chain, assessment_request = SELF_STUDY_DEFAULTS['sequencing'],\n",
442
+ " learning_objectives = learning_objs)\n",
443
+ "\n",
444
+ "print(tutor_q)"
445
+ ]
446
+ },
447
+ {
448
+ "cell_type": "markdown",
449
+ "metadata": {},
450
+ "source": [
451
+ "### Relationships/drawing connections\n",
452
+ "\n",
453
+ "Prompt: Please design a 5 question quiz that asks me to explain the relationships that exist within the following learning objectives, referencing {context}: {learning objectives}."
454
+ ]
455
+ },
456
+ {
457
+ "cell_type": "code",
458
+ "execution_count": null,
459
+ "metadata": {},
460
+ "outputs": [
461
+ {
462
+ "name": "stdout",
463
+ "output_type": "stream",
464
+ "text": [
465
+ "Question 1: What is the underlying message or theme of the text \"The Road Not Taken\"?\n",
466
+ "\n",
467
+ "(Note: The answer to this question will require the student to identify the key elements and important takeaways from the text in order to determine the underlying message or theme.)\n"
468
+ ]
469
+ }
470
+ ],
471
+ "source": [
472
+ "# Relationships example\n",
473
+ "tutor_q = get_tutoring_answer(None, qa_chain, assessment_request = SELF_STUDY_DEFAULTS['relationships'],\n",
474
+ " learning_objectives = learning_objs)\n",
475
+ "\n",
476
+ "print(tutor_q)"
477
+ ]
478
+ },
479
+ {
480
+ "cell_type": "markdown",
481
+ "metadata": {},
482
+ "source": [
483
+ "### Concepts and Definitions\n",
484
+ "\n",
485
+ "Prompt: Design a 5 question quiz that asks me about definitions related to the following learning objectives: {learning objectives} - based on {context}\".\n",
486
+ "Once I write out my response, provide me with your own response, highlighting why my answer is correct or incorrect."
487
+ ]
488
+ },
489
+ {
490
+ "cell_type": "code",
491
+ "execution_count": null,
492
+ "metadata": {},
493
+ "outputs": [
494
+ {
495
+ "name": "stdout",
496
+ "output_type": "stream",
497
+ "text": [
498
+ "Question 1: Based on the provided text, what is the underlying message or theme of the work?\n",
499
+ "\n",
500
+ "Please provide your response.\n"
501
+ ]
502
+ }
503
+ ],
504
+ "source": [
505
+ "# Concepts and definitions example\n",
506
+ "tutor_q = get_tutoring_answer(None, qa_chain, assessment_request = SELF_STUDY_DEFAULTS['concepts'],\n",
507
+ " learning_objectives = learning_objs)\n",
508
+ "\n",
509
+ "print(tutor_q)"
510
+ ]
511
+ },
512
+ {
513
+ "cell_type": "markdown",
514
+ "metadata": {},
515
+ "source": [
516
+ "### Real Word Examples\n",
517
+ "\n",
518
+ "Prompt: Demonstrate how {context} can be applied to solve a real-world problem related to the following learning objectives: {learning objectives}. Ask me questions regarding this theory/concept."
519
+ ]
520
+ },
521
+ {
522
+ "cell_type": "code",
523
+ "execution_count": null,
524
+ "metadata": {},
525
+ "outputs": [
526
+ {
527
+ "name": "stdout",
528
+ "output_type": "stream",
529
+ "text": [
530
+ "Based on the provided context, it seems that the extracted text is a poem by Robert Frost and does not directly provide any information or context related to problem-solving in the real world. Therefore, it may not be possible to demonstrate how the provided context can be applied to solve a real-world problem. However, I can still assess your understanding of the learning objectives mentioned. Let's start with the first learning objective: identifying the key elements of the work, important takeaways, and underlying message. \n",
531
+ "\n",
532
+ "Question 1: Based on your reading of the poem, what are some key elements or important takeaways that you can identify?\n"
533
+ ]
534
+ }
535
+ ],
536
+ "source": [
537
+ "# Real word example\n",
538
+ "tutor_q = get_tutoring_answer(None, qa_chain, assessment_request = SELF_STUDY_DEFAULTS['real_world_example'],\n",
539
+ " learning_objectives = learning_objs)\n",
540
+ "\n",
541
+ "print(tutor_q)"
542
+ ]
543
+ },
544
+ {
545
+ "cell_type": "markdown",
546
+ "metadata": {},
547
+ "source": [
548
+ "### Randomized Question Types\n",
549
+ "\n",
550
+ "Prompt: Please generate a high-quality assessment consisting of 5 varying questions, each of different types (open-ended, multiple choice, etc.), to determine if I achieved the following learning objectives in regards to {context}: {learning objectives}. If I answer incorrectly for any of the questions, please explain why my answer is incorrect."
551
+ ]
552
+ },
553
+ {
554
+ "cell_type": "code",
555
+ "execution_count": null,
556
+ "metadata": {},
557
+ "outputs": [
558
+ {
559
+ "name": "stdout",
560
+ "output_type": "stream",
561
+ "text": [
562
+ "Question 1 (Open-ended):\n",
563
+ "Based on the given excerpt, what do you think is the underlying message or theme of the text? Please provide a brief explanation to support your answer.\n",
564
+ "\n",
565
+ "(Note: The answer to this question will vary depending on the student's interpretation of the text. As the tutor, you can provide feedback on the strengths and weaknesses of their response, and guide them towards a deeper understanding of the text's message.)\n"
566
+ ]
567
+ }
568
+ ],
569
+ "source": [
570
+ "# Randomized question types\n",
571
+ "tutor_q = get_tutoring_answer(None, qa_chain, assessment_request = SELF_STUDY_DEFAULTS['randomized_questions'],\n",
572
+ " learning_objectives = learning_objs)\n",
573
+ "\n",
574
+ "print(tutor_q)"
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "markdown",
579
+ "metadata": {},
580
+ "source": [
581
+ "### Quantiative evaluation the correctness of a student's answer\n",
582
+ "\n",
583
+ "Prompt: (A continuation of the previous chat) Please generate the main points of the student’s answer to the previous question, and evaluate on a scale of 1 to 5 how comprehensive the student’s answer was in relation to the learning objectives, and explain why he or she received this rating, including what was missed in his or her answer if the student’s answer wasn’t complete.\n"
584
+ ]
585
+ },
586
+ {
587
+ "cell_type": "code",
588
+ "execution_count": null,
589
+ "metadata": {},
590
+ "outputs": [
591
+ {
592
+ "name": "stdout",
593
+ "output_type": "stream",
594
+ "text": [
595
+ "Main points of the student's answer:\n",
596
+ "- The underlying message of the text is that people should follow the crowd and take the easy way instead of the road less traveled.\n",
597
+ "- The road less traveled is hard and painful to traverse.\n",
598
+ "\n",
599
+ "Evaluation of the student's answer:\n",
600
+ "I would rate the student's answer a 2 out of 5 in terms of comprehensiveness in relation to the learning objectives. \n",
601
+ "\n",
602
+ "Explanation:\n",
603
+ "The student correctly identifies that the underlying message of the text is related to choosing between two paths, but their interpretation of the message is not entirely accurate. The student suggests that the text encourages people to follow the crowd and take the easy way, which is not supported by the actual message of the poem. The poem actually suggests that taking the road less traveled can make a significant difference in one's life. The student also mentions that the road less traveled is hard and painful to traverse, which is not explicitly stated in the text. This interpretation may be influenced by the student's personal perspective rather than the actual content of the poem. Therefore, the student's answer is not complete and does not fully grasp the intended message of the text.\n"
604
+ ]
605
+ }
606
+ ],
607
+ "source": [
608
+ "# qualitative evaluation\n",
609
+ "qualitative_query = \"\"\" Please generate the main points of the student’s answer to the previous question,\n",
610
+ " and evaluate on a scale of 1 to 5 how comprehensive the student’s answer was in relation to the learning objectives,\n",
611
+ " and explain why he or she received this rating, including what was missed in his or her answer if the student’s answer wasn’t complete.\"\"\"\n",
612
+ "\n",
613
+ "last_answer = (\"TUTOR QUESTION: Question 1 (Open-ended): \" +\n",
614
+ " \"Based on the given excerpt, what do you think is the underlying message or theme of the text? Please provide a \" + \n",
615
+ " \"brief explanation to support your answer.\\n\" + \n",
616
+ " \"STUDENT ANSWER: The underlying message of the text is that people should follow the crowd and the road less traveled is hard \"+\n",
617
+ " \"and painful to traverse. Take the easy way instead. \")\n",
618
+ "\n",
619
+ "# Note that this uses the previous result and query in the context\n",
620
+ "tutor_q = get_tutoring_answer(None, qa_chain, assessment_request = qualitative_query + '\\n' + last_answer,\n",
621
+ " learning_objectives = learning_objs,\n",
622
+ " input_kwargs = {'question':topic})\n",
623
+ "\n",
624
+ "print(tutor_q)"
625
+ ]
626
+ }
627
+ ],
628
+ "metadata": {
629
+ "kernelspec": {
630
+ "display_name": "python3",
631
+ "language": "python",
632
+ "name": "python3"
633
+ }
634
+ },
635
+ "nbformat": 4,
636
+ "nbformat_minor": 0
637
+ }
lo-achievement/prompt_with_vector_store_w_grading_intr.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
lo-achievement/settings.ini ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [DEFAULT]
2
+ # All sections below are required unless otherwise specified.
3
+ # See https://github.com/fastai/nbdev/blob/master/settings.ini for examples.
4
+
5
+ ### Python library ###
6
+ repo = lo-achievement
7
+ lib_name = ai_classroom_suite
8
+ version = 0.0.1
9
+ min_python = 3.7
10
+ license = apache2
11
+ black_formatting = False
12
+
13
+ ### nbdev ###
14
+ doc_path = _docs
15
+ lib_path = ai_classroom_suite
16
+ nbs_path = nbs
17
+ recursive = True
18
+ tst_flags = notest
19
+ put_version_in_init = True
20
+
21
+ ### Docs ###
22
+ branch = main
23
+ custom_sidebar = False
24
+ doc_host = https://%(user)s.github.io
25
+ doc_baseurl = /%(repo)s
26
+ git_url = https://github.com/%(user)s/%(repo)s
27
+ title = %(lib_name)s
28
+
29
+ ### PyPI ###
30
+ audience = Developers
31
+ author = Charreau Bell
32
+ author_email = [email protected]
33
+ copyright = 2023 onwards, %(author)s
34
+ description = A repository supporting enhanced instruction and grading using AI
35
+ keywords = nbdev jupyter notebook python
36
+ language = English
37
+ status = 3
38
+ user = vanderbilt-data-science
39
+
40
+ ### Optional ###
41
+ requirements = langchain pandas numpy getpass openai gradio chromadb tiktoken unstructured pdf2image yt_dlp libmagic chromadb librosa deeplake ipyfilechooser
42
+ # dev_requirements =
43
+ # console_scripts =
lo-achievement/setup.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pkg_resources import parse_version
2
+ from configparser import ConfigParser
3
+ import setuptools, shlex
4
+ assert parse_version(setuptools.__version__)>=parse_version('36.2')
5
+
6
+ # note: all settings are in settings.ini; edit there, not here
7
+ config = ConfigParser(delimiters=['='])
8
+ config.read('settings.ini', encoding='utf-8')
9
+ cfg = config['DEFAULT']
10
+
11
+ cfg_keys = 'version description keywords author author_email'.split()
12
+ expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
13
+ for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
14
+ setup_cfg = {o:cfg[o] for o in cfg_keys}
15
+
16
+ licenses = {
17
+ 'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
18
+ 'mit': ('MIT License', 'OSI Approved :: MIT License'),
19
+ 'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
20
+ 'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
21
+ 'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
22
+ }
23
+ statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
24
+ '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
25
+ py_versions = '3.6 3.7 3.8 3.9 3.10'.split()
26
+
27
+ requirements = shlex.split(cfg.get('requirements', ''))
28
+ if cfg.get('pip_requirements'): requirements += shlex.split(cfg.get('pip_requirements', ''))
29
+ min_python = cfg['min_python']
30
+ lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
31
+ dev_requirements = (cfg.get('dev_requirements') or '').split()
32
+
33
+ setuptools.setup(
34
+ name = cfg['lib_name'],
35
+ license = lic[0],
36
+ classifiers = [
37
+ 'Development Status :: ' + statuses[int(cfg['status'])],
38
+ 'Intended Audience :: ' + cfg['audience'].title(),
39
+ 'Natural Language :: ' + cfg['language'].title(),
40
+ ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
41
+ url = cfg['git_url'],
42
+ packages = setuptools.find_packages(),
43
+ include_package_data = True,
44
+ install_requires = requirements,
45
+ extras_require={ 'dev': dev_requirements },
46
+ dependency_links = cfg.get('dep_links','').split(),
47
+ python_requires = '>=' + cfg['min_python'],
48
+ long_description = open('README.md', encoding='utf-8').read(),
49
+ long_description_content_type = 'text/markdown',
50
+ zip_safe = False,
51
+ entry_points = {
52
+ 'console_scripts': cfg.get('console_scripts','').split(),
53
+ 'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d']
54
+ },
55
+ **setup_cfg)
56
+
57
+
lo-achievement/speech_to_text_models.ipynb ADDED
The diff for this file is too large to render. See raw diff