Requirements for python venv

0dcabd82 · Ribary, Marton Dr (School of Law) · 03649db0 · 0dcabd82 · 0dcabd82 · 0dcabd82
Commit 0dcabd82 authored 4 years ago by Ribary, Marton Dr (School of Law)
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 _Work_in_progress/
 _Scholarship/
 _Notes/
 _Drafts/
\ No newline at end of file
+.venv
\ No newline at end of file
--- a/demo/demo_001_lemmatext.ipynb
+++ b/demo/demo_001_lemmatext.ipynb
 {
- "cells": [
+ "metadata": {
-  {
+  "language_info": {
-   "cell_type": "markdown",
+   "codemirror_mode": {
-   "metadata": {},
+    "name": "ipython",
-   "source": [
+    "version": 3
-    "##1 Getting text ready for vectorization"
+   },
-   ]
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5-final"
  },
+  "orig_nbformat": 2,
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3",
+   "language": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Import packages and models from cltk and initialize tools\n",
+    " # Import packages and models from cltk and initialize tools\n",
    "from cltk.corpus.utils.importer import CorpusImporter\n",
    "from cltk.lemmatize.latin.backoff import BackoffLatinLemmatizer\n",
    "corpus_importer = CorpusImporter('latin')\n",
@@ -36,8 +51,8 @@
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
     "output_type": "stream",
+     "name": "stdout",
     "text": [
      "hoc igitur ius nostrum constat aut ex scripto aut sine scripto ut apud graecos\n"
     ]
@@ -60,8 +75,8 @@
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
     "output_type": "stream",
+     "name": "stdout",
     "text": [
      "['hoc', 'igitur', 'ius', 'nostrum', 'constat', 'aut', 'ex', 'scripto', 'aut', 'sine', 'scripto', 'ut', 'apud', 'graecos']\n"
     ]
@@ -78,8 +93,8 @@
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
     "output_type": "stream",
+     "name": "stdout",
     "text": [
      "[('hoc', 'hic'), ('igitur', 'igitur'), ('ius', 'ius'), ('nostrum', 'nos'), ('constat', 'consto'), ('aut', 'aut'), ('ex', 'ex'), ('scripto', 'scribo'), ('aut', 'aut'), ('sine', 'sine'), ('scripto', 'scribo'), ('ut', 'ut'), ('apud', 'apud'), ('graecos', 'graecus')]\n"
     ]
@@ -92,23 +107,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
-    "path_stoplist = '/home/mribary/Dropbox/pyDigest/dump/D_stoplist_001.txt'\n",
+    "path_stoplist = '/home/mribary/OneDrive/Git/pydigest/dump/D_stoplist_001.txt'\n",
    "stopwords = list(pd.read_csv(path_stoplist, header=None)[0])  # 57 custom stopwords"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
     "output_type": "stream",
+     "name": "stdout",
     "text": [
      "igitur ius nos consto scribo scribo apud graecus\n"
     ]
@@ -125,27 +140,13 @@
    "textunit = textunit.strip()\n",
    "print(textunit)"
   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
  },
-  "language_info": {
+  {
-   "codemirror_mode": {
+   "cell_type": "code",
-    "name": "ipython",
+   "execution_count": null,
-    "version": 3
+   "metadata": {},
-   },
+   "outputs": [],
-   "file_extension": ".py",
+   "source": []
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.2"
  }
- },
+ ]
- "nbformat": 4,
+}
- "nbformat_minor": 2
\ No newline at end of file
-}
-%% Cell type:markdown id: tags:
-##1 Getting text ready for vectorization
 %% Cell type:code id: tags:
 ``` python
-# Import packages and models from cltk and initialize tools
+ # Import packages and models from cltk and initialize tools
 from cltk.corpus.utils.importer import CorpusImporter
 from cltk.lemmatize.latin.backoff import BackoffLatinLemmatizer
 corpus_importer = CorpusImporter('latin')
 corpus_importer.import_corpus('latin_models_cltk')
 lemmatizer = BackoffLatinLemmatizer()
 ```
 %% Cell type:code id: tags:
 ``` python
 text = 'Hoc igitur ius nostrum constat aut ex scripto aut sine scripto, ut apud Graecos: τῶν νόμων οἱ μὲν ἔγγραφοι, οἱ δὲ ἄγραφοι .'
 ```
 %% Cell type:code id: tags:
 ``` python
 import re
 punctuation = r"[\"#$%&\'()*+,-/:;<=>@[\]^_`{|}~.?!«»]"
 new_text = ''.join(["" if ord(i) < 32 or ord(i) > 126 else i for i in text])
 text_no_punct = re.sub(punctuation, '', new_text)
 text_one_white_space = re.sub(r"\s{2,}", ' ', text_no_punct)
 text_no_trailing_space = text_one_white_space.strip()
 text_lower = text_no_trailing_space.lower()
 print(text_lower)
 ```
 %% Output
    hoc igitur ius nostrum constat aut ex scripto aut sine scripto ut apud graecos
 %% Cell type:code id: tags:
 ``` python
 text_split = text_lower.split(' ')
 print(text_split)
 ```
 %% Output
    ['hoc', 'igitur', 'ius', 'nostrum', 'constat', 'aut', 'ex', 'scripto', 'aut', 'sine', 'scripto', 'ut', 'apud', 'graecos']
 %% Cell type:code id: tags:
 ``` python
 lemmas = lemmatizer.lemmatize(text_split)
 print(lemmas)
 ```
 %% Output
    [('hoc', 'hic'), ('igitur', 'igitur'), ('ius', 'ius'), ('nostrum', 'nos'), ('constat', 'consto'), ('aut', 'aut'), ('ex', 'ex'), ('scripto', 'scribo'), ('aut', 'aut'), ('sine', 'sine'), ('scripto', 'scribo'), ('ut', 'ut'), ('apud', 'apud'), ('graecos', 'graecus')]
 %% Cell type:code id: tags:
 ``` python
 import pandas as pd
-path_stoplist = '/home/mribary/Dropbox/pyDigest/dump/D_stoplist_001.txt'
+path_stoplist = '/home/mribary/OneDrive/Git/pydigest/dump/D_stoplist_001.txt'
 stopwords = list(pd.read_csv(path_stoplist, header=None)[0])  # 57 custom stopwords
 ```
 %% Cell type:code id: tags:
 ``` python
 textunit = ''
 for y in range(len(lemmas)):
    if stopwords is not None:
        if lemmas[y][1] not in stopwords:
            textunit = textunit + str(lemmas[y][1] + ' ')
    else:
        textunit = textunit + str(lemmas[y][1] + ' ')
 textunit = textunit.strip()
 print(textunit)
 ```
 %% Output
    igitur ius nos consto scribo scribo apud graecus
+%% Cell type:code id: tags:
+``` python
+```

--- a/requirements.txt
+++ b/requirements.txt
+cltk==0.1.117
+fasttext==0.9.2
+gensim==3.8.3
+matplotlib==3.2.1
+nltk==3.5
+numpy==1.18.4
+pandas==1.0.3
+regex==2020.4.4
+scikit-learn==0.22.2.post1
+scipy==1.4.1
+seaborn==0.10.1
\ No newline at end of file