From 391ac16f370de3a182a2c898d49a93cb9bbc21f4 Mon Sep 17 00:00:00 2001 From: Kukulkan Date: Sun, 4 Oct 2020 19:39:54 +0530 Subject: [PATCH] String Algorithm updated --- README.md | 4 ++ src/algorithms/string/dice_coeff.js | 67 +++++++++++++++++++ src/algorithms/string/index.js | 8 ++- src/algorithms/string/kmp.js | 54 +++++++++++++++ src/algorithms/string/rabin_karp.js | 80 +++++++++++++++++++++++ test/algorithms/string/test_dice_coeff.js | 40 ++++++++++++ test/algorithms/string/test_kmp.js | 36 ++++++++++ test/algorithms/string/test_rabin_karp.js | 36 ++++++++++ 8 files changed, 324 insertions(+), 1 deletion(-) create mode 100644 src/algorithms/string/dice_coeff.js create mode 100644 src/algorithms/string/kmp.js create mode 100644 src/algorithms/string/rabin_karp.js create mode 100644 test/algorithms/string/test_dice_coeff.js create mode 100644 test/algorithms/string/test_kmp.js create mode 100644 test/algorithms/string/test_rabin_karp.js diff --git a/README.md b/README.md index ce93985d..0fcf7da6 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,11 @@ Various Math algorithms: #### String Various String algorithms: + +- [Dice's coefficient](https://github.com/manrajgrover/algorithms-js/blob/master/src/algorithms/string/dice_coeff.js) +- [KMP Pattern Search](https://github.com/manrajgrover/algorithms-js/blob/master/src/algorithms/string/kmp.js) - [Levenshtein Distance](https://github.com/manrajgrover/algorithms-js/blob/master/src/algorithms/string/levenshtein_distance.js) +- [Rabin-Karp Pattern Search](https://github.com/manrajgrover/algorithms-js/blob/master/src/algorithms/string/rabin_karp.js) #### Geometry Various Geometry algorithms: diff --git a/src/algorithms/string/dice_coeff.js b/src/algorithms/string/dice_coeff.js new file mode 100644 index 00000000..f8c7450b --- /dev/null +++ b/src/algorithms/string/dice_coeff.js @@ -0,0 +1,67 @@ +function n_gram(str,n){ + + if (n <1 || n>str.length) throw Error("Not possible n-gram"); + + gram_split = new Map(); // gram <-> count + let startpos;let current_gram = ""; + for(startpos = 0;startpos < str.length ; startpos+=1){ + current_gram += str[startpos] + + if(startpos >= n-1) { + + if (gram_split.has(current_gram)){ + gram_split.set(current_gram,gram_split.get(current_gram) + 1) + } else { + gram_split.set(current_gram,1) + } + + current_gram = current_gram.substr(1) + } + } + + return gram_split +} + +function min(a,b){ + return a>b ? b : a; +} + +/** + * Returns the computed Dice Coefficient for the given n-gram of the two strings + * @param {String} st1 First string + * @param {String} st2 Second String + * @param {Number} n n-mer + * @return {Number} a number between 0 and 1 + */ +function compute_dice(st1,st2,n){ + + if (st1.length<1 || st2.length<1 || n > min(st1.length,st2.length)) return -1 + + map_st1 = n_gram(st1,n) + map_st2 = n_gram(st2,n) + + let shared_value_count = 0; + + for (let x of map_st1.keys()){ + if(map_st2.has(x)){ + shared_value_count+= min(map_st1.get(x),map_st2.get(x)); + } + } + + let st1_value_count = 0,st2_value_count = 0; + + for (let x of map_st1.keys()){ + st1_value_count += map_st1.get(x) + } + + for (let x of map_st2.keys()){ + st2_value_count += map_st2.get(x) + } + + let score = (2* shared_value_count)/(st1_value_count + st2_value_count) + + return score + //score = 2*shared_value_count/ (st1_value_count + st2_value_count) +} + +module.exports = compute_dice; \ No newline at end of file diff --git a/src/algorithms/string/index.js b/src/algorithms/string/index.js index ad535890..22584a98 100644 --- a/src/algorithms/string/index.js +++ b/src/algorithms/string/index.js @@ -1,5 +1,11 @@ +const dice_coeff = require('./dice_coeff') +const kmp = require('./kmp') const levenshteindistance = require('./levenshtein_distance'); +const rabin_karp = require('./rabin_karp') module.exports = { - levenshteindistance + dice_coeff, + kmp, + levenshteindistance, + rabin_karp }; diff --git a/src/algorithms/string/kmp.js b/src/algorithms/string/kmp.js new file mode 100644 index 00000000..2838cad3 --- /dev/null +++ b/src/algorithms/string/kmp.js @@ -0,0 +1,54 @@ +function kmp(searchTerm,searchSpace,uniqueToken = '\b'){ + //borderSize will be used to match up with searchTerm + $ + searchSpace + var borderSize = [] + var textArray = searchTerm + uniqueToken + searchSpace + borderSize.push(0); + + let border = 0; + for (i=1; i 0) && (textArray[i] != textArray[border])) { + border = borderSize[border - 1] + } + + if(textArray[i] == textArray[border]){ + border += 1 + borderSize.push(border) + } else { + border = 0 + borderSize.push(border) + } + } + + //returns the end positions + return borderSize +} + + +/** + * Returns an Array of starting indexes where searchTerm was found. Returns [-1] if no element + * @param {String} searchTerm Term to search for + * @param {String} searchSpace Text to be searched in + * @param {String} uniqueToken unique never occuring token for border cover + * @return {Number[]} Array containing index location of matches + */ +function run_kmp(searchTerm,searchSpace,uniqueToken = '\b'){ + if (searchTerm.length<1 || searchSpace.length<1){ + return [-1] + } else { + let startIndex = [] + correctPositions = kmp(searchTerm,searchSpace,uniqueToken = '\b') + for (i =0; i0){ + if (internal_exponent%2==0){ + internal_base *= internal_base + internal_exponent = parseInt(internal_exponent/2) + } else { + final_output *= internal_base + internal_exponent -= 1 + } + } + return final_output +} + +function linearCompare(searchTerm, searchSpace, startPoint, endPoint){ + if(endPoint-startPoint + 1 != searchTerm.length ){ + return false + } + + let j; + for (j= startPoint; j= searchTerm.length){ + super_hash -= (nth_polynomial_chain * searchSpace.charCodeAt(currentTermIndex)) + currentTermIndex +=1 + } + current_hash = super_hash % bucket + + //console.log(currentTermIndex + " to " +j + " :" + current_hash+ "\n") + if (current_hash == hash_val){ + if(linearCompare(searchTerm,searchSpace,currentTermIndex,j)){ + startIndex.push(currentTermIndex); + } + } + } + + if (startIndex.length<1){ + return [-1] + } else { + return startIndex + } +} + +module.exports = rabin_karp \ No newline at end of file diff --git a/test/algorithms/string/test_dice_coeff.js b/test/algorithms/string/test_dice_coeff.js new file mode 100644 index 00000000..02cbf9a3 --- /dev/null +++ b/test/algorithms/string/test_dice_coeff.js @@ -0,0 +1,40 @@ +const diceCoeff = require('../../../src').algorithms.string.dice_coeff; +const assert = require('assert'); + +describe('Dice Coefficient', () => { + + const stringOne = "batman", stringTwo = "cowgirl", stringThree = "batmen"; + const n = 2; const over = 100 + + it('should return 1 for the same string', () => { + + const result = diceCoeff(stringOne,stringOne,n); + assert.strictEqual(result,1); + }); + + it('should return -1 if either of the strings is empty', () => { + + const resultOne = diceCoeff("",stringTwo,n); + assert.strictEqual(resultOne,-1); + const resultTwo = diceCoeff(stringOne,"",n); + assert.strictEqual(resultTwo,-1); + }); + + it('should return -1 if the n value is more than the minimum length of either of the two strings', () => { + const result = diceCoeff(stringOne,stringTwo,over); + assert.strictEqual(result,-1); + }); + + + it(`should return 0 for ${stringOne} and ${stringTwo} with n = ${n}`, () => { + const result = diceCoeff(stringOne,stringTwo,n); + assert.strictEqual(result,0); + }); + + it(`should return 0. for ${stringTwo} and ${stringThree} with n = ${n}`, () => { + const result = diceCoeff(stringTwo,stringThree,n); + + assert.strictEqual(result,0.6) + + }); +}); \ No newline at end of file diff --git a/test/algorithms/string/test_kmp.js b/test/algorithms/string/test_kmp.js new file mode 100644 index 00000000..d1b165ca --- /dev/null +++ b/test/algorithms/string/test_kmp.js @@ -0,0 +1,36 @@ +const kmp = require("../../../src").algorithms.string.kmp; +const assert = require('assert'); + + +describe('Knuth Morris Pratt', () => { + + const searchTerm1 = "cancan", searchTerm2 = "mixolydian", searchTerm3 = "pina"; + const searchSpace1 = "cancancan like a tin cancan?", searchSpace2 = "what even do you mean", searchSpace3 = "pina colada and what ?"; + + it(`should return index of \"${searchTerm1}\" in three positions for \"${searchSpace1}\"`, () => { + + const result = kmp(searchTerm1,searchSpace1) + assert.deepStrictEqual(result, [0,3,21]); + }); + + it(`should return [-1] for \"${searchTerm2}\" in \"${searchSpace2}\"`, () => { + + const result = kmp(searchTerm2,searchSpace2) + assert.deepStrictEqual(result, [-1]); + }); + + it(`should return one index for \"${searchTerm3}\" in \"${searchSpace3}\" `, () => { + + const result = kmp(searchTerm3,searchSpace3) + assert.deepStrictEqual(result, [0]); + }); + + it('should return [-1] if either the searchTerm or searchSpace is empty', () => { + + const resultOne = kmp("",searchSpace1); + assert.deepStrictEqual(resultOne, [-1]); + + const resultTwo = kmp(searchTerm1,""); + assert.deepStrictEqual(resultTwo, [-1]); + }); +}); \ No newline at end of file diff --git a/test/algorithms/string/test_rabin_karp.js b/test/algorithms/string/test_rabin_karp.js new file mode 100644 index 00000000..b6b94234 --- /dev/null +++ b/test/algorithms/string/test_rabin_karp.js @@ -0,0 +1,36 @@ +const rabin_karp = require("../../../src/").algorithms.string.rabin_karp; +const assert = require('assert'); + + +describe('Rabin Karp', () => { + + const searchTerm1 = "cancan", searchTerm2 = "mixolydian", searchTerm3 = "pina"; + const searchSpace1 = "cancancan like a tin cancan?", searchSpace2 = "what even do you mean", searchSpace3 = "pina colada and what ?"; + + it(`should return index of \"${searchTerm1}\" in three positions for \"${searchSpace1}\"`, () => { + + const result = rabin_karp(searchTerm1,searchSpace1) + assert.deepStrictEqual(result, [0,3,21]); + }); + + it(`should return [-1] for \"${searchTerm2}\" in \"${searchSpace2}\"`, () => { + + const result = rabin_karp(searchTerm2,searchSpace2) + assert.deepStrictEqual(result, [-1]); + }); + + it(`should return one index for \"${searchTerm3}\" in \"${searchSpace3}\" `, () => { + + const result = rabin_karp(searchTerm3,searchSpace3) + assert.deepStrictEqual(result, [0]); + }); + + it('should return [-1] if either the searchTerm or searchSpace is empty', () => { + + const resultOne = kmp("",searchSpace1); + assert.deepStrictEqual(resultOne, [-1]); + + const resultTwo = kmp(searchTerm1,""); + assert.deepStrictEqual(resultTwo, [-1]); + }); +}); \ No newline at end of file