diff --git a/NLP_documetation.md b/NLP_documetation.md
index 59099b29bcb674f8ba7259b935ea8251980d1a6c..667757421bc487d2046f0f82ffeb3240f8df5895 100644
--- a/NLP_documetation.md
+++ b/NLP_documetation.md
@@ -96,9 +96,9 @@ _*Rerun and rewrite* Clusters produced at cuts specified in the previous step ar
 
 `K-means_silhouette_norm_top50.py > silhouette_scores_norm_top50.txt, norm_top50_silhouette_2to75.png`
 
-The script loads the normalized dataframe (340 theamtic sections, top 50 lemmas only) and the Tfidf matrices of sections and titles. In order to determine the number of clusters (K), the script gets the silhouette scores for clustering between a range of 2 and 75. The silhouette score measures the inner density of clusters against the outer distance between them, so that a score close 1 means perfect and 0 means totally unreliable clustering. The score 1 can only be achieved when the number of clusters (K) is equal to the number of samples being clustered.
+The script loads the normalized dataframe (339 theamtic sections, top 50 lemmas only) and the Tfidf matrices of sections and titles. In order to determine the number of clusters (K), the script gets the silhouette scores for clustering between a range of 2 and 75. The silhouette score measures the inner density of clusters against the outer distance between them, so that a score close 1 means perfect and 0 means totally unreliable clustering. The score 1 can only be achieved when the number of clusters (K) is equal to the number of samples being clustered.
 
-Silhouette scores take a long time to compute, beacuse the K-means algorithm approximates its result in multiple iterations which are here set to 300. As the algorithm starts from a random state and iterations are stopped at 300, running the algorithm multiple times procduces different results. After the fifth running, the silhouette score suggests that the optimal number of clusters is 54 at a score of 0.0666. The graph below shows how the silhouette score changes as we cluster datapoints in the range between 2 and 75.
+Silhouette scores take a long time to compute, beacuse the K-means algorithm approximates its result in multiple iterations which are here set to 300. As the algorithm starts from a random state and iterations are stopped at 300, running the algorithm multiple times procduces different results. After the fifth running, the silhouette score suggests that the optimal number of clusters is 61 at a score of 0.0707. The graph below shows how the silhouette score changes as we cluster datapoints in the range between 2 and 75.
 
 ![Silhouette graph](https://github.com/mribary/pyDigest/blob/master/images/norm_top50_silhouette_2to75.png)
 
diff --git a/dump/silhouette_scores_norm_top50.txt b/dump/silhouette_scores_norm_top50.txt
index 93b69b0ccf6286514cb8c90582751b6c8d1a725c..f9d69c3dc79feed177612c6374b986fef2d169a7 100644
Binary files a/dump/silhouette_scores_norm_top50.txt and b/dump/silhouette_scores_norm_top50.txt differ
diff --git a/images/norm_top50_silhouette_2to75.png b/images/norm_top50_silhouette_2to75.png
index 061674c0a5319522c7a637e747b9063ceb028874..61e0242aacf71abf95db1111f0ff116e0224a5f2 100644
Binary files a/images/norm_top50_silhouette_2to75.png and b/images/norm_top50_silhouette_2to75.png differ
diff --git a/script/K-means_norm_top50_001.py b/script/K-means_norm_top50_001.py
index 96fabfa9e0c859c072be610c6334d3ecb1beae57..2f156cd354d3e2475fb525d7ce5e8a907245e1af 100644
--- a/script/K-means_norm_top50_001.py
+++ b/script/K-means_norm_top50_001.py
@@ -11,9 +11,9 @@ sf = pd.read_csv('./dump/tfidf_sections_norm_top50.csv', index_col=0)
 tf = pd.read_csv('./dump/tfidf_titles_norm.csv', index_col=0)
 
 # Extract matrix from dataframe
-X = np.array(sf.values)         # Tfidf matrix of shape 340 (sections) x 3868 (terms)
+X = np.array(sf.values)         # Tfidf matrix of shape xxx (sections) x xxx (terms)
 section_IDs = list(sf.index)    # List for section_IDs
-# X.shape
+print(X.shape)
 
 # Generate silhouette scores for the range between 2 and 75 clusters
 NumberOfClusters=range(2,75)