diff --git a/pyDigest.py b/pyDigest.py index e6b02b94a9b4e6ce77c68c4de521004531422ea1..1271541cfdfaf184648233ea7e0e144201990e06 100644 --- a/pyDigest.py +++ b/pyDigest.py @@ -190,4 +190,43 @@ def latin_lemma_text(list_of_texts, stopwords=None): textunit = textunit + str(lemmas[y][1] + ' ') textunit = textunit.strip() a.append(textunit) # Add the "document" to a list - return a \ No newline at end of file + return a + +def tmp_download(url): + """ + Create a temporary path for the download in /tmp. + The file is cleared from /tmp at the next reboot. + The default behaviour depends on system settings. + """ + import os + import requests + import sys + import random + import string + + baseFile = os.path.basename(url) + + uuid_path = ''.join([random.choice(string.ascii_letters + string.digits) for i in range(10)]) + + temp_path = "/tmp" + temp_path_uniq = os.path.join(temp_path, uuid_path) + os.mkdir(temp_path_uniq) + + download_path = os.path.join(temp_path_uniq, baseFile) + + total_size = int(requests.get(url, stream=True).headers['Content-length']) + + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(download_path, 'wb') as f: + count = total_size + for chunk in r.iter_content(chunk_size=8192): + count = count - sys.getsizeof(chunk) + print(str(baseFile) + ' : ' + \ + str("%.2f" % round(((count / total_size)*100), 2)) \ + + '%' + ' remaining ', end='\r') + f.write(chunk) + + print('\nDownload is complete\nThe file is available at:\n' + str(download_path)) + print('\nMove the file to a permanent location, if you wish to keep it.\n') + return download_path \ No newline at end of file diff --git a/pyDigest_documentation.md b/pyDigest_documentation.md index bdbefd7b492a51ffbe207a71a457d11e2feaf331..1ef60f25fb13633f96786aa606a5c51f6580c460 100644 --- a/pyDigest_documentation.md +++ b/pyDigest_documentation.md @@ -98,4 +98,10 @@ The function takes a list of strings in Latin and returns a list of lemmas for t `stopwords`: list of words to be removed -**Example for use**: `wordvec_001.py`, to create a txt file required for word vector calculations with `fasttext` \ No newline at end of file +**Example for use**: `wordvec_001.py`, to create a txt file required for word vector calculations with `fasttext` + +### 5. `tmp_download(url)` + +The function downloads a file from an online repository to the system's temporary folder `\tmp`. It takes the URL of the file in the repository as an input and returns a temporary path on the local machine where the file is downloaded. + +**Example for use**: `wordvec_xx.py`, to download and load word vector models for the fasttext module. \ No newline at end of file