From 070e4f63cdc999ac320d45f64951f364331b4dbf Mon Sep 17 00:00:00 2001 From: mribary <m.ribary@surrey.ac.uk> Date: Sun, 24 May 2020 17:44:18 +0100 Subject: [PATCH] Created tmp_download function --- pyDigest.py | 41 ++++++++++++++++++++++++++++++++++++++- pyDigest_documentation.md | 8 +++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/pyDigest.py b/pyDigest.py index e6b02b9..1271541 100644 --- a/pyDigest.py +++ b/pyDigest.py @@ -190,4 +190,43 @@ def latin_lemma_text(list_of_texts, stopwords=None): textunit = textunit + str(lemmas[y][1] + ' ') textunit = textunit.strip() a.append(textunit) # Add the "document" to a list - return a \ No newline at end of file + return a + +def tmp_download(url): + """ + Create a temporary path for the download in /tmp. + The file is cleared from /tmp at the next reboot. + The default behaviour depends on system settings. + """ + import os + import requests + import sys + import random + import string + + baseFile = os.path.basename(url) + + uuid_path = ''.join([random.choice(string.ascii_letters + string.digits) for i in range(10)]) + + temp_path = "/tmp" + temp_path_uniq = os.path.join(temp_path, uuid_path) + os.mkdir(temp_path_uniq) + + download_path = os.path.join(temp_path_uniq, baseFile) + + total_size = int(requests.get(url, stream=True).headers['Content-length']) + + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(download_path, 'wb') as f: + count = total_size + for chunk in r.iter_content(chunk_size=8192): + count = count - sys.getsizeof(chunk) + print(str(baseFile) + ' : ' + \ + str("%.2f" % round(((count / total_size)*100), 2)) \ + + '%' + ' remaining ', end='\r') + f.write(chunk) + + print('\nDownload is complete\nThe file is available at:\n' + str(download_path)) + print('\nMove the file to a permanent location, if you wish to keep it.\n') + return download_path \ No newline at end of file diff --git a/pyDigest_documentation.md b/pyDigest_documentation.md index bdbefd7..1ef60f2 100644 --- a/pyDigest_documentation.md +++ b/pyDigest_documentation.md @@ -98,4 +98,10 @@ The function takes a list of strings in Latin and returns a list of lemmas for t `stopwords`: list of words to be removed -**Example for use**: `wordvec_001.py`, to create a txt file required for word vector calculations with `fasttext` \ No newline at end of file +**Example for use**: `wordvec_001.py`, to create a txt file required for word vector calculations with `fasttext` + +### 5. `tmp_download(url)` + +The function downloads a file from an online repository to the system's temporary folder `\tmp`. It takes the URL of the file in the repository as an input and returns a temporary path on the local machine where the file is downloaded. + +**Example for use**: `wordvec_xx.py`, to download and load word vector models for the fasttext module. \ No newline at end of file -- GitLab