1 var toString = require('./toString');
3 /** Used to compose unicode character classes. */
4 var rsAstralRange = '\\ud800-\\udfff',
5 rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23',
6 rsComboSymbolsRange = '\\u20d0-\\u20f0',
7 rsDingbatRange = '\\u2700-\\u27bf',
8 rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff',
9 rsMathOpRange = '\\xac\\xb1\\xd7\\xf7',
10 rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf',
11 rsQuoteRange = '\\u2018\\u2019\\u201c\\u201d',
12 rsSpaceRange = ' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000',
13 rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde',
14 rsVarRange = '\\ufe0e\\ufe0f',
15 rsBreakRange = rsMathOpRange + rsNonCharRange + rsQuoteRange + rsSpaceRange;
17 /** Used to compose unicode capture groups. */
18 var rsBreak = '[' + rsBreakRange + ']',
19 rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']',
21 rsDingbat = '[' + rsDingbatRange + ']',
22 rsLower = '[' + rsLowerRange + ']',
23 rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']',
24 rsFitz = '\\ud83c[\\udffb-\\udfff]',
25 rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')',
26 rsNonAstral = '[^' + rsAstralRange + ']',
27 rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}',
28 rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]',
29 rsUpper = '[' + rsUpperRange + ']',
32 /** Used to compose unicode regexes. */
33 var rsLowerMisc = '(?:' + rsLower + '|' + rsMisc + ')',
34 rsUpperMisc = '(?:' + rsUpper + '|' + rsMisc + ')',
35 reOptMod = rsModifier + '?',
36 rsOptVar = '[' + rsVarRange + ']?',
37 rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*',
38 rsSeq = rsOptVar + reOptMod + rsOptJoin,
39 rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq;
41 /** Used to match non-compound words composed of alphanumeric characters. */
42 var reBasicWord = /[a-zA-Z0-9]+/g;
44 /** Used to match complex or compound words. */
45 var reComplexWord = RegExp([
46 rsUpper + '?' + rsLower + '+(?=' + [rsBreak, rsUpper, '$'].join('|') + ')',
47 rsUpperMisc + '+(?=' + [rsBreak, rsUpper + rsLowerMisc, '$'].join('|') + ')',
48 rsUpper + '?' + rsLowerMisc + '+',
54 /** Used to detect strings that need a more robust regexp to match words. */
55 var reHasComplexWord = /[a-z][A-Z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/;
58 * Splits `string` into an array of its words.
63 * @param {string} [string=''] The string to inspect.
64 * @param {RegExp|string} [pattern] The pattern to match words.
65 * @param- {Object} [guard] Enables use as an iteratee for functions like `_.map`.
66 * @returns {Array} Returns the words of `string`.
69 * _.words('fred, barney, & pebbles');
70 * // => ['fred', 'barney', 'pebbles']
72 * _.words('fred, barney, & pebbles', /[^, ]+/g);
73 * // => ['fred', 'barney', '&', 'pebbles']
75 function words(string, pattern, guard) {
76 string = toString(string);
77 pattern = guard ? undefined : pattern;
79 if (pattern === undefined) {
80 pattern = reHasComplexWord.test(string) ? reComplexWord : reBasicWord;
82 return string.match(pattern) || [];
85 module.exports = words;