N-Grams

n-grams can be obtained for either arrays or strings (which will be tokenized for you):

var NGrams = natural.NGrams;

bigrams

console.log(NGrams.bigrams('some words here'));
console.log(NGrams.bigrams(['some',  'words',  'here']));

Both of the above output: [ [ 'some', 'words' ], [ 'words', 'here' ] ]

trigrams

console.log(NGrams.trigrams('some other words here'));
console.log(NGrams.trigrams(['some',  'other', 'words',  'here']));

Both of the above output: [ [ 'some', 'other', 'words' ], [ 'other', 'words', 'here' ] ]

arbitrary n-grams

console.log(NGrams.ngrams('some other words here for you', 4));
console.log(NGrams.ngrams(['some', 'other', 'words', 'here', 'for',
    'you'], 4));

The above outputs: [ [ 'some', 'other', 'words', 'here' ], [ 'other', 'words', 'here', 'for' ], [ 'words', 'here', 'for', 'you' ] ]

padding

n-grams can also be returned with left or right padding by passing a start and/or end symbol to the bigrams, trigrams or ngrams.

console.log(NGrams.ngrams('some other words here for you', 4, '[start]', '[end]'));

The above will output:

[ [ '[start]', '[start]', '[start]', 'some' ],
  [ '[start]', '[start]', 'some', 'other' ],
  [ '[start]', 'some', 'other', 'words' ],
  [ 'some', 'other', 'words', 'here' ],
  [ 'other', 'words', 'here', 'for' ],
  [ 'words', 'here', 'for', 'you' ],
  [ 'here', 'for', 'you', '[end]' ],
  [ 'for', 'you', '[end]', '[end]' ],
  [ 'you', '[end]', '[end]', '[end]' ] ]

For only end symbols, pass null for the start symbol, for instance:

console.log(NGrams.ngrams('some other words here for you', 4, null, '[end]'));

Will output:

[ [ 'some', 'other', 'words', 'here' ],
  [ 'other', 'words', 'here', 'for' ],
  [ 'words', 'here', 'for', 'you' ],
  [ 'here', 'for', 'you', '[end]' ],
  [ 'for', 'you', '[end]', '[end]' ],
  [ 'you', '[end]', '[end]', '[end]' ] ]

NGramsZH

For Chinese like languages, you can use NGramsZH to do a n-gram, and all apis are the same:

var NGramsZH = natural.NGramsZH;
console.log(NGramsZH.bigrams('中文测试'));
console.log(NGramsZH.bigrams(['',  '',  '', '']));
console.log(NGramsZH.trigrams('中文测试'));
console.log(NGramsZH.trigrams(['',  '', '',  '']));
console.log(NGramsZH.ngrams('一个中文测试', 4));
console.log(NGramsZH.ngrams(['', '', '', '', '',
    ''], 4));