Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

String Algorithm updated #122

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,11 @@ Various Math algorithms:
#### String
Various String algorithms:


- [Dice's coefficient](https://github.com/manrajgrover/algorithms-js/blob/master/src/algorithms/string/dice_coeff.js)
- [KMP Pattern Search](https://github.com/manrajgrover/algorithms-js/blob/master/src/algorithms/string/kmp.js)
- [Levenshtein Distance](https://github.com/manrajgrover/algorithms-js/blob/master/src/algorithms/string/levenshtein_distance.js)
- [Rabin-Karp Pattern Search](https://github.com/manrajgrover/algorithms-js/blob/master/src/algorithms/string/rabin_karp.js)

#### Geometry
Various Geometry algorithms:
Expand Down
67 changes: 67 additions & 0 deletions src/algorithms/string/dice_coeff.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
function n_gram(str,n){

if (n <1 || n>str.length) throw Error("Not possible n-gram");

gram_split = new Map(); // gram <-> count
let startpos;let current_gram = "";
for(startpos = 0;startpos < str.length ; startpos+=1){
current_gram += str[startpos]

if(startpos >= n-1) {

if (gram_split.has(current_gram)){
gram_split.set(current_gram,gram_split.get(current_gram) + 1)
} else {
gram_split.set(current_gram,1)
}

current_gram = current_gram.substr(1)
}
}

return gram_split
}

function min(a,b){
return a>b ? b : a;
}

/**
* Returns the computed Dice Coefficient for the given n-gram of the two strings
* @param {String} st1 First string
* @param {String} st2 Second String
* @param {Number} n n-mer
* @return {Number} a number between 0 and 1
*/
function compute_dice(st1,st2,n){

if (st1.length<1 || st2.length<1 || n > min(st1.length,st2.length)) return -1

map_st1 = n_gram(st1,n)
map_st2 = n_gram(st2,n)

let shared_value_count = 0;

for (let x of map_st1.keys()){
if(map_st2.has(x)){
shared_value_count+= min(map_st1.get(x),map_st2.get(x));
}
}

let st1_value_count = 0,st2_value_count = 0;

for (let x of map_st1.keys()){
st1_value_count += map_st1.get(x)
}

for (let x of map_st2.keys()){
st2_value_count += map_st2.get(x)
}

let score = (2* shared_value_count)/(st1_value_count + st2_value_count)

return score
//score = 2*shared_value_count/ (st1_value_count + st2_value_count)
}

module.exports = compute_dice;
8 changes: 7 additions & 1 deletion src/algorithms/string/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
const dice_coeff = require('./dice_coeff')
const kmp = require('./kmp')
const levenshteindistance = require('./levenshtein_distance');
const rabin_karp = require('./rabin_karp')

module.exports = {
levenshteindistance
dice_coeff,
kmp,
levenshteindistance,
rabin_karp
};
54 changes: 54 additions & 0 deletions src/algorithms/string/kmp.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
function kmp(searchTerm,searchSpace,uniqueToken = '\b'){
//borderSize will be used to match up with searchTerm + $ + searchSpace
var borderSize = []
var textArray = searchTerm + uniqueToken + searchSpace
borderSize.push(0);

let border = 0;
for (i=1; i<searchTerm.length + searchSpace.length + 1; i++) {
while ((border > 0) && (textArray[i] != textArray[border])) {
border = borderSize[border - 1]
}

if(textArray[i] == textArray[border]){
border += 1
borderSize.push(border)
} else {
border = 0
borderSize.push(border)
}
}

//returns the end positions
return borderSize
}


/**
* Returns an Array of starting indexes where searchTerm was found. Returns [-1] if no element
* @param {String} searchTerm Term to search for
* @param {String} searchSpace Text to be searched in
* @param {String} uniqueToken unique never occuring token for border cover
* @return {Number[]} Array containing index location of matches
*/
function run_kmp(searchTerm,searchSpace,uniqueToken = '\b'){
if (searchTerm.length<1 || searchSpace.length<1){
return [-1]
} else {
let startIndex = []
correctPositions = kmp(searchTerm,searchSpace,uniqueToken = '\b')
for (i =0; i<correctPositions.length; i++){
if(correctPositions[i] == searchTerm.length){
// we were offsetting the value by and additional searchTerm.length + 1 .Hence the i - (searchTerm.length -1 ) - (searchTerm.length + 1)
startIndex.push(i - 2* searchTerm.length)
}
}
if (!startIndex.length){
return [-1]
} else {
return startIndex
}
}
}

module.exports = run_kmp
80 changes: 80 additions & 0 deletions src/algorithms/string/rabin_karp.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
function pow(base,exponent){
var final_output = 1, internal_base = base, internal_exponent = exponent
while (internal_exponent>0){
if (internal_exponent%2==0){
internal_base *= internal_base
internal_exponent = parseInt(internal_exponent/2)
} else {
final_output *= internal_base
internal_exponent -= 1
}
}
return final_output
}

function linearCompare(searchTerm, searchSpace, startPoint, endPoint){
if(endPoint-startPoint + 1 != searchTerm.length ){
return false
}

let j;
for (j= startPoint; j<endPoint+1; j++){
if(searchTerm[j-startPoint] != searchSpace[j]){
return false;
}
}
return true;
}

/**
* Returns an Array of starting indexes where searchTerm was found. Returns [-1] if no element
* @param {String} searchTerm Term to search for
* @param {String} searchSpace Text to be searched in
* @param {Number} polynomial Polynomial value for polynomial hash fn
* @param {Number} bucket Size of your bucket (more is better)
* @return {Number[]} Array containing index location of matches
*/
function rabin_karp(searchTerm, searchSpace, polynomial = 7, bucket = 3000){
if (searchTerm.length<1 || searchSpace.length<1){
return [-1]
}

let hash_val = 0
for(i=0; i<searchTerm.length; i++){
hash_val *= polynomial
hash_val += searchTerm.charCodeAt(i)
}
hash_val %= bucket

var startIndex = []

let super_hash = 0,current_hash =0;
let currentTermIndex = 0
const nth_polynomial_chain = pow(polynomial,searchTerm.length)

for(j=0; j<searchSpace.length; j++){
super_hash *= polynomial
super_hash += searchSpace.charCodeAt(j)

if(j >= searchTerm.length){
super_hash -= (nth_polynomial_chain * searchSpace.charCodeAt(currentTermIndex))
currentTermIndex +=1
}
current_hash = super_hash % bucket

//console.log(currentTermIndex + " to " +j + " :" + current_hash+ "\n")
if (current_hash == hash_val){
if(linearCompare(searchTerm,searchSpace,currentTermIndex,j)){
startIndex.push(currentTermIndex);
}
}
}

if (startIndex.length<1){
return [-1]
} else {
return startIndex
}
}

module.exports = rabin_karp
41 changes: 41 additions & 0 deletions test/algorithms/string/test_dice_coeff.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/* eslint-env mocha */
const diceCoeff = require('../../../src').algorithms.string.dice_coeff;
const assert = require('assert');

describe('Dice Coefficient', () => {

const stringOne = "batman", stringTwo = "cowgirl", stringThree = "batmen";
const n = 2; const over = 100

it('should return 1 for the same string', () => {

const result = diceCoeff(stringOne,stringOne,n);
assert.strictEqual(result,1);
});

it('should return -1 if either of the strings is empty', () => {

const resultOne = diceCoeff("",stringTwo,n);
assert.strictEqual(resultOne,-1);
const resultTwo = diceCoeff(stringOne,"",n);
assert.strictEqual(resultTwo,-1);
});

it('should return -1 if the n value is more than the minimum length of either of the two strings', () => {
const result = diceCoeff(stringOne,stringTwo,over);
assert.strictEqual(result,-1);
});


it(`should return 0 for ${stringOne} and ${stringTwo} with n = ${n}`, () => {
const result = diceCoeff(stringOne,stringTwo,n);
assert.strictEqual(result,0);
});

it(`should return 0. for ${stringTwo} and ${stringThree} with n = ${n}`, () => {
const result = diceCoeff(stringTwo,stringThree,n);

assert.strictEqual(result,0.6)

});
});
37 changes: 37 additions & 0 deletions test/algorithms/string/test_kmp.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/* eslint-env mocha */
const kmp = require("../../../src").algorithms.string.kmp;
const assert = require('assert');


describe('Knuth Morris Pratt', () => {

const searchTerm1 = "cancan", searchTerm2 = "mixolydian", searchTerm3 = "pina";
const searchSpace1 = "cancancan like a tin cancan?", searchSpace2 = "what even do you mean", searchSpace3 = "pina colada and what ?";

it(`should return index of \"${searchTerm1}\" in three positions for \"${searchSpace1}\"`, () => {

const result = kmp(searchTerm1,searchSpace1)
assert.deepStrictEqual(result, [0,3,21]);
});

it(`should return [-1] for \"${searchTerm2}\" in \"${searchSpace2}\"`, () => {

const result = kmp(searchTerm2,searchSpace2)
assert.deepStrictEqual(result, [-1]);
});

it(`should return one index for \"${searchTerm3}\" in \"${searchSpace3}\" `, () => {

const result = kmp(searchTerm3,searchSpace3)
assert.deepStrictEqual(result, [0]);
});

it('should return [-1] if either the searchTerm or searchSpace is empty', () => {

const resultOne = kmp("",searchSpace1);
assert.deepStrictEqual(resultOne, [-1]);

const resultTwo = kmp(searchTerm1,"");
assert.deepStrictEqual(resultTwo, [-1]);
});
});
37 changes: 37 additions & 0 deletions test/algorithms/string/test_rabin_karp.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/* eslint-env mocha */
const rabin_karp = require("../../../src/").algorithms.string.rabin_karp;
const assert = require('assert');


describe('Rabin Karp', () => {

const searchTerm1 = "cancan", searchTerm2 = "mixolydian", searchTerm3 = "pina";
const searchSpace1 = "cancancan like a tin cancan?", searchSpace2 = "what even do you mean", searchSpace3 = "pina colada and what ?";

it(`should return index of \"${searchTerm1}\" in three positions for \"${searchSpace1}\"`, () => {

const result = rabin_karp(searchTerm1,searchSpace1)
assert.deepStrictEqual(result, [0,3,21]);
});

it(`should return [-1] for \"${searchTerm2}\" in \"${searchSpace2}\"`, () => {

const result = rabin_karp(searchTerm2,searchSpace2)
assert.deepStrictEqual(result, [-1]);
});

it(`should return one index for \"${searchTerm3}\" in \"${searchSpace3}\" `, () => {

const result = rabin_karp(searchTerm3,searchSpace3)
assert.deepStrictEqual(result, [0]);
});

it('should return [-1] if either the searchTerm or searchSpace is empty', () => {

const resultOne = kmp("",searchSpace1);
assert.deepStrictEqual(resultOne, [-1]);

const resultTwo = kmp(searchTerm1,"");
assert.deepStrictEqual(resultTwo, [-1]);
});
});