- scikit-learn - numpy

PyScript Experiments

Fun with PyScript....

Topic Modeling with PyScript

Enter each text statement on a new line
Human machine interface for Lab ABC computer applications A survey of user opinion of computer system response time The EPS user interface management system System and human system engineering testing of EPS Relation of user-perceived response time to error measurement The generation of random, binary, unordered trees The intersection graph of paths in trees Graph minors IV: Widths of trees and quasi-ordering Graph minors: A survey

Number of Topics

from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.decomposition import NMF import numpy as np text_collection_element = Element("text_collection") topics_element = Element("topics") no_topics_element = Element("no_topics") def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents): output = "" for topic_idx, topic in enumerate(H): output += "Topic %d:" % (topic_idx) + " " output += " ".join([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]]) output += "\n" top_doc_indices = np.argsort( W[:,topic_idx] )[::-1][0:no_top_documents] for doc_index in top_doc_indices: output += documents[doc_index] + "\n" output += "\n \n" return output def find_topics(*args, **kwargs): no_topics = int(no_topics_element.value) documents = text_collection_element.value.split("\n") documents = [x.strip() for x in documents if x.strip()] tfidf_vectorizer = TfidfVectorizer(stop_words='english') tfidf = tfidf_vectorizer.fit_transform(documents) tfidf_feature_names = tfidf_vectorizer.get_feature_names_out() nmf = NMF(n_components=no_topics,init='nndsvd').fit(tfidf) nmf_W = nmf.transform(tfidf) nmf_H = nmf.components_ no_top_words = 4 no_top_documents = 2 output = display_topics(nmf_H, nmf_W, tfidf_feature_names, documents, no_top_words, no_top_documents) topics_element.write(output)

Back to list of Pyscript Experiments