From 0dcabd828638f4782db8485c687685623586605f Mon Sep 17 00:00:00 2001 From: mribary <m.ribary@surrey.ac.uk> Date: Mon, 21 Dec 2020 17:26:36 +0000 Subject: [PATCH] Requirements for python venv --- .gitignore | 3 +- ..._lemmatext.ipynb => demo_001_lemmas.ipynb} | 75 ++++++++++--------- requirements.txt | 11 +++ 3 files changed, 51 insertions(+), 38 deletions(-) rename demo/{demo_001_lemmatext.ipynb => demo_001_lemmas.ipynb} (89%) create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 82fda43..2acfbc4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ _Work_in_progress/ _Scholarship/ _Notes/ -_Drafts/ \ No newline at end of file +_Drafts/ +.venv \ No newline at end of file diff --git a/demo/demo_001_lemmatext.ipynb b/demo/demo_001_lemmas.ipynb similarity index 89% rename from demo/demo_001_lemmatext.ipynb rename to demo/demo_001_lemmas.ipynb index 8f5ae17..eefd28b 100644 --- a/demo/demo_001_lemmatext.ipynb +++ b/demo/demo_001_lemmas.ipynb @@ -1,19 +1,34 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##1 Getting text ready for vectorization" - ] + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5-final" }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3", + "language": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# Import packages and models from cltk and initialize tools\n", + " # Import packages and models from cltk and initialize tools\n", "from cltk.corpus.utils.importer import CorpusImporter\n", "from cltk.lemmatize.latin.backoff import BackoffLatinLemmatizer\n", "corpus_importer = CorpusImporter('latin')\n", @@ -36,8 +51,8 @@ "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "hoc igitur ius nostrum constat aut ex scripto aut sine scripto ut apud graecos\n" ] @@ -60,8 +75,8 @@ "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "['hoc', 'igitur', 'ius', 'nostrum', 'constat', 'aut', 'ex', 'scripto', 'aut', 'sine', 'scripto', 'ut', 'apud', 'graecos']\n" ] @@ -78,8 +93,8 @@ "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "[('hoc', 'hic'), ('igitur', 'igitur'), ('ius', 'ius'), ('nostrum', 'nos'), ('constat', 'consto'), ('aut', 'aut'), ('ex', 'ex'), ('scripto', 'scribo'), ('aut', 'aut'), ('sine', 'sine'), ('scripto', 'scribo'), ('ut', 'ut'), ('apud', 'apud'), ('graecos', 'graecus')]\n" ] @@ -92,23 +107,23 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", - "path_stoplist = '/home/mribary/Dropbox/pyDigest/dump/D_stoplist_001.txt'\n", + "path_stoplist = '/home/mribary/OneDrive/Git/pydigest/dump/D_stoplist_001.txt'\n", "stopwords = list(pd.read_csv(path_stoplist, header=None)[0]) # 57 custom stopwords" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "igitur ius nos consto scribo scribo apud graecus\n" ] @@ -125,27 +140,13 @@ "textunit = textunit.strip()\n", "print(textunit)" ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.2" + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7b4a4a2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +cltk==0.1.117 +fasttext==0.9.2 +gensim==3.8.3 +matplotlib==3.2.1 +nltk==3.5 +numpy==1.18.4 +pandas==1.0.3 +regex==2020.4.4 +scikit-learn==0.22.2.post1 +scipy==1.4.1 +seaborn==0.10.1 \ No newline at end of file -- GitLab