Given a book of words and an integer K. Assume you have enough main memory to accommodate all words. Design a dynamic data structure to find the top K most frequent words in a book. The structure should allow new words to be added in main memory.
Examples:
Input: fileData = "Welcome to the world of Geeks. This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks"
Output:
"your" : 3
"well" : 3
"and" : 4
"to" : 4
"Geeks" : 6
Using Hash Map and Heap
- Store all words and their frequencies in a hash map.
- Store top k frequent items in a min heap (Please refer k Largest Elements in an Array for details)
- Print the words and their frequencies in the decreasing order of frequencies.
Important Points about Implementations
- In Python, we have a direct function most_common()
- In JavaScript, we do not have direct implementation of min heap, so we have used sorting.
#include <bits/stdc++.h>
using namespace std;
void processText(const string& text, int k) {
// Store Frequencies of all words
unordered_map<string, int> freqMap;
istringstream iss(text);
for (string word; iss >> word;) freqMap[word]++;
// Store frequency map items in a priority queue (or min heap)
// with frequency as key
priority_queue<pair<int, string>, vector<pair<int, string>>, greater<>> pq;
for (auto x : freqMap) {
pq.emplace(x.second, x.first);
if (pq.size() > k) pq.pop();
}
// Get the top frequenty items
vector<pair<int, string>> res;
while (!pq.empty()) {
res.push_back(pq.top());
pq.pop();
}
// Reverse to get the desired order
reverse(res.begin(), res.end());
for (auto x : res)
cout << x.second << " : " << x.first << endl;
}
int main() {
string text = "Welcome to the world of Geeks Geeks for Geeks is great";
processText(text, 5);
// to read from file
// ifstream file("file.txt");
// if (!file) {
// cerr << "File doesn't exist" << endl;
// return 1;
// }
// printKMostFreq(file, k);
// using string instead of file to
// test and run the code
return 0;
}
import java.util.*;
import java.io.*;
public class Main {
public static void processText(String text, int k) {
// Store Frequencies of all words
Map<String, Integer> freqMap = new HashMap<>();
String[] words = text.split(" ");
for (String word : words) {
freqMap.put(word, freqMap.getOrDefault(word, 0) + 1);
}
// Store frequency map items in a priority queue (or min heap)
// with frequency as key
PriorityQueue<Map.Entry<String, Integer>> pq = new PriorityQueue<>(
(a, b) -> a.getValue() - b.getValue()
);
for (Map.Entry<String, Integer> entry : freqMap.entrySet()) {
pq.offer(entry);
if (pq.size() > k) pq.poll();
}
// Get the top frequency items
List<Map.Entry<String, Integer>> res = new ArrayList<>();
while (!pq.isEmpty()) {
res.add(pq.poll());
}
// Reverse to get the desired order
Collections.reverse(res);
for (Map.Entry<String, Integer> entry : res)
System.out.println(entry.getKey() + " : " + entry.getValue());
}
public static void main(String[] args) {
String text = "Welcome to the world of Geeks Geeks for Geeks is great";
processText(text, 5);
// to read from file
// try (Scanner file = new Scanner(new File("file.txt"))) {
// printKMostFreq(file, k);
// }
// using string instead of file to
// test and run the code
}
}
from collections import Counter
def process_text(text, k):
# Store Frequencies of all words
freq_map = Counter(text.split())
# Get the top k frequent items
res = freq_map.most_common(k)
for word, freq in res:
print(f'{word} : {freq}')
if __name__ == '__main__':
text = 'Welcome to the world of Geeks Geeks for Geeks is great'
process_text(text, 5)
# to read from file
# with open('file.txt', 'r') as file:
# text = file.read()
# process_text(text, k)
using System;
using System.Collections.Generic;
using System.Linq;
class MainClass {
public static void ProcessText(string text, int k) {
// Store Frequencies of all words
Dictionary<string, int> freqMap = new Dictionary<string, int>();
string[] words = text.Split(' ');
foreach (string word in words) {
if (freqMap.ContainsKey(word)) {
freqMap[word]++;
} else {
freqMap[word] = 1;
}
}
// Store frequency map items in a priority queue
// (max heap) with frequency as key
var pq = new PriorityQueue<string, int>();
foreach (var entry in freqMap) {
pq.Enqueue(entry.Key, -entry.Value);
if (pq.Count > k) pq.Dequeue();
}
// Get the top frequency items
List<KeyValuePair<string, int>> res = new List<KeyValuePair<string, int>>();
while (pq.Count > 0) {
var item = pq.Dequeue();
res.Add(new KeyValuePair<string, int>(item.Item1, -item.Item2));
}
res.Reverse(); // To get the highest frequency first
foreach (var entry in res)
Console.WriteLine(entry.Key + " : " + entry.Value);
}
public static void Main(string[] args) {
string text = "Welcome to the world of Geeks Geeks for Geeks is great";
ProcessText(text, 5);
}
}
function processText(text, k) {
// Store Frequencies of all words
const freqMap = {};
const words = text.split(' ');
for (let word of words) {
freqMap[word] = (freqMap[word] || 0) + 1;
}
// Store frequency map items in an array and sort
const sortedWords = Object.entries(freqMap).sort((a, b) => a[1] - b[1]);
// Get the top k frequent items
const res = sortedWords.slice(-k).reverse();
for (const [word, freq] of res) {
console.log(`${word} : ${freq}`);
}
}
const text = 'Welcome to the world of Geeks Geeks for Geeks is great';
processText(text, 5);
// to read from file
// const fs = require('fs');
// fs.readFile('file.txt', 'utf8', (err, data) => {
// if (err) {
// console.error("File doesn't exist");
// return;
// }
// processText(data, k);
// });
Time Complexity : O(n + n Log k) where n is the number of words in the file. We assume that every word is of constant length.
Using Trie and Min Heap
The approach leverages a Trie to efficiently store and search words as they are read from the file, while simultaneously keeping track of each word's occurrence count. Each Trie node is enhanced with an additional field, indexMinHeap, which indicates the position of the word in the Min Heap if it is currently among the top k frequent words (or -1 if it is not). In parallel, a Min Heap of fixed size k is maintained to record the k most frequent words encountered so far. Each node in the Min Heap contains the word, its frequency, and a pointer to the corresponding Trie leaf node. As words are processed, the algorithm updates their frequencies in the Trie and then reflects these changes in the Min Heap by either updating an existing entry, inserting a new entry if space is available, or replacing the root of the Min Heap (which represents the least frequent word among the top k) when the new word’s frequency exceeds it.
Step-by-Step Process to Execute the Code
- Open the input file and ensure it is accessible; report an error if the file cannot be opened.
- Read words from the file one by one. For each word, insert it into the Trie: if the word already exists, increment its frequency counter; if not, create a new node and initialize its count to 1.
- For every word inserted or updated in the Trie, update the Min Heap as follows:
- If the word is already present in the Min Heap (i.e., its
indexMinHeapis not -1), simply update its frequency in the heap and callminHeapify()at the respective index. - If the word is not present and the Min Heap has available space, insert the new word into the heap, update its corresponding Trie node's
indexMinHeap, and rebuild the heap. - If the Min Heap is full, compare the frequency of the new word with the frequency at the root of the heap (the smallest frequency among the top k). If the new word’s frequency is lower, do nothing; if it is higher, replace the root with the new word, update the Trie node of the word being replaced (setting its
indexMinHeapto -1), and callminHeapify()to restore the heap property.
- If the word is already present in the Min Heap (i.e., its
- After processing all words, the Min Heap will contain the k most frequent words. Finally, iterate over the Min Heap and print each word along with its frequency.
Below is given the implementation:
#include <bits/stdc++.h>
using namespace std;
class Node {
public:
bool isEnd;
unsigned freq;
int ind;
vector<Node*> child;
Node() : isEnd(false), freq(0),
ind(-1), child(26, nullptr) {}
};
class minHeapNode {
public:
Node* root;
unsigned freq;
string word;
minHeapNode() :
root(nullptr), freq(0), word("") {}
};
class MinHeap {
public:
int cap;
int count;
vector<minHeapNode> arr;
MinHeap(int cap) :
cap(cap), count(0), arr(cap) {}
void swapNodes(int a, int b) {
swap(arr[a], arr[b]);
arr[a].root->ind = a;
arr[b].root->ind = b;
}
void heapify(int idx) {
int left = 2 * idx + 1;
int right = 2 * idx + 2;
int mini = idx;
if (left < count &&
arr[left].freq < arr[mini].freq)
mini = left;
if (right < count &&
arr[right].freq < arr[mini].freq)
mini = right;
if (mini != idx) {
swapNodes(idx, mini);
heapify(mini);
}
}
void build() {
for (int i = (count - 1) / 2; i >= 0; --i)
heapify(i);
}
};
void insert(MinHeap& mH, Node* root,
const string& word) {
// Case 1: word is already in mH,
// so update its freq.
if (root->ind != -1) {
++mH.arr[root->ind].freq;
mH.heapify(root->ind);
}
// Case 2: Word is not in mH and
// there's still room.
else if (mH.count < mH.cap) {
minHeapNode node;
node.root = root;
node.freq = root->freq;
node.word = word;
mH.arr[mH.count] = node;
root->ind = mH.count++;
mH.build();
}
// Case 3: Heap is full and freq of new
// word is greater than the root.
else if (root->freq > mH.arr[0].freq) {
mH.arr[0].root->ind = -1;
minHeapNode node;
node.root = root;
node.freq = root->freq;
node.word = word;
mH.arr[0] = node;
root->ind = 0;
mH.heapify(0);
}
}
void insertUtil(Node*& root, MinHeap& mH,
const string& word, size_t index = 0) {
if (!root)
root = new Node();
if (index < word.size()) {
int pos = tolower(word[index]) - 'a';
if (pos >= 0 && pos < 26)
insertUtil(root->child[pos],
mH, word, index + 1);
} else {
if (root->isEnd)
++root->freq;
else {
root->isEnd = true;
root->freq = 1;
}
insert(mH, root, word);
}
}
void insertTrieAndHeap(const string& word,
Node*& root, MinHeap& mH) {
insertUtil(root, mH, word);
}
void displayMinHeap(const MinHeap& mH) {
for (int i = 0; i < mH.count; ++i)
cout << mH.arr[i].word << " : "
<< mH.arr[i].freq << endl;
}
void printKMostFreq(ifstream& file, int k) {
MinHeap mH(k);
Node* root = nullptr;
// to process the words in file
string word;
while (file >> word) {
insertTrieAndHeap(word, root, mH);
}
displayMinHeap(mH);
// Clean up the Trie memory
if (root) {
delete root;
}
}
void printKMostFreq(string str, int k) {
MinHeap mH(k);
Node* root = nullptr;
istringstream iss(str);
string word;
while (iss >> word) {
insertTrieAndHeap(word, root, mH);
}
displayMinHeap(mH);
// Clean up the Trie memory
if (root) {
delete root;
}
}
int main() {
int k = 5;
string str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks";
printKMostFreq(str, k);
// to read from file
// ifstream file("file.txt");
// if (!file) {
// cerr << "File doesn't exist" << endl;
// return 1;
// }
// printKMostFreq(file, k);
// using string instead of file to
// test and run the code
return 0;
}
import java.io.*;
import java.util.*;
import java.util.regex.*;
class Node {
boolean isEnd;
int freq;
int ind;
Node[] child;
Node() {
isEnd = false;
freq = 0;
ind = -1;
child = new Node[26];
}
}
class MinHeapNode {
Node root;
int freq;
String word;
MinHeapNode() {
root = null;
freq = 0;
word = "";
}
}
class MinHeap {
int cap;
int count;
MinHeapNode[] arr;
MinHeap(int cap) {
this.cap = cap;
count = 0;
arr = new MinHeapNode[cap];
for (int i = 0; i < cap; i++) {
arr[i] = new MinHeapNode();
}
}
void swapNodes(int a, int b) {
MinHeapNode temp = arr[a];
arr[a] = arr[b];
arr[b] = temp;
arr[a].root.ind = a;
arr[b].root.ind = b;
}
void heapify(int idx) {
int left = 2 * idx + 1;
int right = 2 * idx + 2;
int mini = idx;
if (left < count && arr[left].freq < arr[mini].freq)
mini = left;
if (right < count && arr[right].freq < arr[mini].freq)
mini = right;
if (mini != idx) {
swapNodes(idx, mini);
heapify(mini);
}
}
void build() {
for (int i = (count - 1) / 2; i >= 0; --i)
heapify(i);
}
}
class GfG {
static void insert(MinHeap mH, Node root, String word) {
if (root.ind != -1) {
++mH.arr[root.ind].freq;
mH.heapify(root.ind);
} else if (mH.count < mH.cap) {
MinHeapNode node = new MinHeapNode();
node.root = root;
node.freq = root.freq;
node.word = word;
mH.arr[mH.count] = node;
root.ind = mH.count++;
mH.build();
} else if (root.freq > mH.arr[0].freq) {
mH.arr[0].root.ind = -1;
MinHeapNode node = new MinHeapNode();
node.root = root;
node.freq = root.freq;
node.word = word;
mH.arr[0] = node;
root.ind = 0;
mH.heapify(0);
}
}
static void insertUtil(Node root, MinHeap mH, String word, int index) {
if (index < word.length()) {
int pos = Character.toLowerCase(word.charAt(index)) - 'a';
if (pos >= 0 && pos < 26) {
if (root.child[pos] == null) {
root.child[pos] = new Node();
}
insertUtil(root.child[pos], mH, word, index + 1);
}
} else {
if (root.isEnd)
++root.freq;
else {
root.isEnd = true;
root.freq = 1;
}
insert(mH, root, word);
}
}
static void insertTrieAndHeap(String word, Node root, MinHeap mH) {
insertUtil(root, mH, word, 0);
}
static void displayMinHeap(MinHeap mH) {
for (int i = 0; i < mH.count; ++i)
System.out.println(mH.arr[i].word + " : " + mH.arr[i].freq);
}
static void printKMostFreq(BufferedReader file, int k) throws IOException {
MinHeap mH = new MinHeap(k);
Node root = new Node();
String line;
while ((line = file.readLine()) != null) {
for (String word : line.split("\\W+")) {
if (!word.isEmpty()) {
insertTrieAndHeap(word.toLowerCase(), root, mH);
}
}
}
displayMinHeap(mH);
}
static void printKMostFreq(String str, int k) {
MinHeap mH = new MinHeap(k);
Node root = new Node();
for (String word : str.split("\\W+")) {
if (!word.isEmpty()) {
insertTrieAndHeap(word.toLowerCase(), root, mH);
}
}
displayMinHeap(mH);
}
public static void main(String[] args) throws IOException {
int k = 5;
// to read from file
// BufferedReader file = new BufferedReader(new FileReader("file.txt"));
// printKMostFreq(file, k);
// using string instead of file to
// test and run the code
String str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks";
printKMostFreq(str, k);
}
}
import heapq
import string
class Node:
def __init__(self):
self.isEnd = False
self.freq = 0
self.ind = -1
self.child = [None] * 26
class MinHeapNode:
def __init__(self):
self.root = None
self.freq = 0
self.word = ""
class MinHeap:
def __init__(self, cap):
self.cap = cap
self.count = 0
self.arr = [MinHeapNode() for _ in range(cap)]
def swapNodes(self, a, b):
self.arr[a], self.arr[b] = self.arr[b], self.arr[a]
self.arr[a].root.ind = a
self.arr[b].root.ind = b
def heapify(self, idx):
left = 2 * idx + 1
right = 2 * idx + 2
mini = idx
if left < self.count and self.arr[left].freq < self.arr[mini].freq:
mini = left
if right < self.count and self.arr[right].freq < self.arr[mini].freq:
mini = right
if mini != idx:
self.swapNodes(idx, mini)
self.heapify(mini)
def build(self):
for i in range((self.count - 1) // 2, -1, -1):
self.heapify(i)
def insert(mH, root, word):
# Case 1: word is already in mH,
# so update its freq.
if root.ind != -1:
mH.arr[root.ind].freq += 1
mH.heapify(root.ind)
# Case 2: Word is not in mH and
# there's still room.
elif mH.count < mH.cap:
node = MinHeapNode()
node.root = root
node.freq = root.freq
node.word = word
mH.arr[mH.count] = node
root.ind = mH.count
mH.count += 1
mH.build()
# Case 3: Heap is full and freq of new
# word is greater than the root.
elif root.freq > mH.arr[0].freq:
mH.arr[0].root.ind = -1
node = MinHeapNode()
node.root = root
node.freq = root.freq
node.word = word
mH.arr[0] = node
root.ind = 0
mH.heapify(0)
def insertUtil(root, mH, word, index=0):
if root is None:
root = Node()
if index < len(word):
pos = ord(word[index].lower()) - ord('a')
if 0 <= pos < 26:
if root.child[pos] is None:
root.child[pos] = Node()
insertUtil(root.child[pos], mH, word, index + 1)
else:
if root.isEnd:
root.freq += 1
else:
root.isEnd = True
root.freq = 1
insert(mH, root, word)
def insertTrieAndHeap(word, root, mH):
insertUtil(root, mH, word)
def displayMinHeap(mH):
for i in range(mH.count):
print(mH.arr[i].word, ":", mH.arr[i].freq)
def printKMostFreq(file, k):
mH = MinHeap(k)
root = Node()
# to process the words in file
for word in file.read().split():
insertTrieAndHeap(word, root, mH)
displayMinHeap(mH)
def printKMostFreqString(str, k):
mH = MinHeap(k)
root = Node()
for word in str.split():
insertTrieAndHeap(word, root, mH)
displayMinHeap(mH)
if __name__ == "__main__":
k = 5
# to read from file
# with open("file.txt", "r") as file:
# printKMostFreq(file, k)
# using string instead of file to
# test and run the code
str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks"
printKMostFreqString(str, k)
using System;
using System.IO;
using System.Collections.Generic;
using System.Text.RegularExpressions;
class Node {
public bool isEnd;
public int freq;
public int ind;
public Node[] children;
public Node() {
isEnd = false;
freq = 0;
ind = -1;
children = new Node[26];
}
}
class MinHeapNode {
public Node root;
public int freq;
public string word;
public MinHeapNode() {
root = null;
freq = 0;
word = "";
}
}
class MinHeap {
public int cap;
public int count;
public MinHeapNode[] arr;
public MinHeap(int cap) {
this.cap = cap;
count = 0;
arr = new MinHeapNode[cap];
for (int i = 0; i < cap; i++) {
arr[i] = new MinHeapNode();
}
}
public void SwapNodes(int a, int b) {
MinHeapNode temp = arr[a];
arr[a] = arr[b];
arr[b] = temp;
arr[a].root.ind = a;
arr[b].root.ind = b;
}
public void Heapify(int idx) {
int left = 2 * idx + 1;
int right = 2 * idx + 2;
int mini = idx;
if (left < count && arr[left].freq < arr[mini].freq)
mini = left;
if (right < count && arr[right].freq < arr[mini].freq)
mini = right;
if (mini != idx) {
SwapNodes(idx, mini);
Heapify(mini);
}
}
public void Build() {
for (int i = (count - 1) / 2; i >= 0; --i)
Heapify(i);
}
}
class GfG {
static void Insert(MinHeap mH, Node root, string word) {
// Case 1: word is already in mH,
// so update its freq.
if (root.ind != -1) {
++mH.arr[root.ind].freq;
mH.Heapify(root.ind);
}
// Case 2: Word is not in mH and
// there's still room.
else if (mH.count < mH.cap) {
MinHeapNode node = new MinHeapNode();
node.root = root;
node.freq = root.freq;
node.word = word;
mH.arr[mH.count] = node;
root.ind = mH.count++;
mH.Build();
}
// Case 3: Heap is full and freq of new
// word is greater than the root.
else if (root.freq > mH.arr[0].freq) {
mH.arr[0].root.ind = -1;
MinHeapNode node = new MinHeapNode();
node.root = root;
node.freq = root.freq;
node.word = word;
mH.arr[0] = node;
root.ind = 0;
mH.Heapify(0);
}
}
static void InsertUtil(Node root, MinHeap mH, string word, int index = 0) {
if (index < word.Length) {
int pos = Char.ToLower(word[index]) - 'a';
if (pos >= 0 && pos < 26) {
if (root.children[pos] == null) {
root.children[pos] = new Node();
}
InsertUtil(root.children[pos], mH, word, index + 1);
}
} else {
if (root.isEnd)
++root.freq;
else {
root.isEnd = true;
root.freq = 1;
}
Insert(mH, root, word);
}
}
static void InsertTrieAndHeap(string word, Node root, MinHeap mH) {
InsertUtil(root, mH, word);
}
static void DisplayMinHeap(MinHeap mH) {
for (int i = 0; i < mH.count; ++i)
Console.WriteLine(mH.arr[i].word + " : " + mH.arr[i].freq);
}
static void PrintKMostFreq(StreamReader file, int k) {
MinHeap mH = new MinHeap(k);
Node root = new Node();
// to process the words in file
string line;
while ((line = file.ReadLine()) != null) {
foreach (string word in Regex.Split(line, @"\W+")) {
if (!string.IsNullOrEmpty(word)) {
InsertTrieAndHeap(word.ToLower(), root, mH);
}
}
}
DisplayMinHeap(mH);
}
static void PrintKMostFreq(string str, int k) {
MinHeap mH = new MinHeap(k);
Node root = new Node();
foreach (string word in Regex.Split(str, @"\W+")) {
if (!string.IsNullOrEmpty(word)) {
InsertTrieAndHeap(word.ToLower(), root, mH);
}
}
DisplayMinHeap(mH);
}
public static void Main() {
int k = 5;
// to read from file
// using (StreamReader file = new StreamReader("file.txt")) {
// PrintKMostFreq(file, k);
// }
// using string instead of file to
// test and run the code
string str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks";
PrintKMostFreq(str, k);
}
}
class Node {
constructor() {
this.isEnd = false;
this.freq = 0;
this.ind = -1;
this.child = new Array(26).fill(null);
}
}
class MinHeapNode {
constructor() {
this.root = null;
this.freq = 0;
this.word = "";
}
}
class MinHeap {
constructor(cap) {
this.cap = cap;
this.count = 0;
this.arr = new Array(cap).fill(null).map(() => new MinHeapNode());
}
swapNodes(a, b) {
[this.arr[a], this.arr[b]] = [this.arr[b], this.arr[a]];
this.arr[a].root.ind = a;
this.arr[b].root.ind = b;
}
heapify(idx) {
let left = 2 * idx + 1;
let right = 2 * idx + 2;
let mini = idx;
if (left < this.count && this.arr[left].freq < this.arr[mini].freq)
mini = left;
if (right < this.count && this.arr[right].freq < this.arr[mini].freq)
mini = right;
if (mini !== idx) {
this.swapNodes(idx, mini);
this.heapify(mini);
}
}
build() {
for (let i = Math.floor((this.count - 1) / 2); i >= 0; --i)
this.heapify(i);
}
}
function insert(mH, root, word) {
// Case 1: word is already in mH,
// so update its freq.
if (root.ind !== -1) {
mH.arr[root.ind].freq++;
mH.heapify(root.ind);
}
// Case 2: Word is not in mH and
// there's still room.
else if (mH.count < mH.cap) {
let node = new MinHeapNode();
node.root = root;
node.freq = root.freq;
node.word = word;
mH.arr[mH.count] = node;
root.ind = mH.count++;
mH.build();
}
// Case 3: Heap is full and freq of new
// word is greater than the root.
else if (root.freq > mH.arr[0].freq) {
mH.arr[0].root.ind = -1;
let node = new MinHeapNode();
node.root = root;
node.freq = root.freq;
node.word = word;
mH.arr[0] = node;
root.ind = 0;
mH.heapify(0);
}
}
function insertUtil(root, mH, word, index = 0) {
if (!root)
root = new Node();
if (index < word.length) {
let pos = word[index].toLowerCase().charCodeAt(0) - 'a'.charCodeAt(0);
if (pos >= 0 && pos < 26) {
if (!root.child[pos])
root.child[pos] = new Node();
insertUtil(root.child[pos], mH, word, index + 1);
}
} else {
if (root.isEnd)
root.freq++;
else {
root.isEnd = true;
root.freq = 1;
}
insert(mH, root, word);
}
}
function insertTrieAndHeap(word, root, mH) {
insertUtil(root, mH, word);
}
function displayMinHeap(mH) {
for (let i = 0; i < mH.count; ++i)
console.log(mH.arr[i].word + " : " + mH.arr[i].freq);
}
function printKMostFreq(str, k) {
let mH = new MinHeap(k);
let root = new Node();
let words = str.split(/\s+/);
for (let word of words) {
insertTrieAndHeap(word, root, mH);
}
displayMinHeap(mH);
}
function main() {
let k = 5;
// using string instead of file to
// test and run the code
let str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks";
printKMostFreq(str, k);
}
main();
Output
your : 3 well : 3 and : 4 to : 4 Geeks : 6
The above output is for a file with following content.
Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks.