3 namespace Drupal\search;
5 use Drupal\Core\Database\Query\Condition;
6 use Drupal\Core\Database\Query\SelectExtender;
7 use Drupal\Core\Database\Query\SelectInterface;
10 * Search query extender and helper functions.
12 * Performs a query on the full-text search index for a word or words.
14 * This query is used by search plugins that use the search index (not all
15 * search plugins do, as some use a different searching mechanism). It
16 * assumes you have set up a query on the {search_index} table with alias 'i',
17 * and will only work if the user is searching for at least one "positive"
20 * For efficiency, users of this query can run the prepareAndNormalize()
21 * method to figure out if there are any search results, before fully setting
22 * up and calling execute() to execute the query. The scoring expressions are
23 * not needed until the execute() step. However, it's not really necessary
24 * to do this, because this class's execute() method does that anyway.
26 * During both the prepareAndNormalize() and execute() steps, there can be
27 * problems. Call getStatus() to figure out if the query is OK or not.
29 * The query object is given the tag 'search_$type' and can be further
30 * extended with hook_query_alter().
32 class SearchQuery extends SelectExtender {
35 * Indicates no positive keywords were in the search expression.
37 * Positive keywords are words that are searched for, as opposed to negative
38 * keywords, which are words that are excluded. To count as a keyword, a
39 * word must be at least
40 * \Drupal::config('search.settings')->get('index.minimum_word_size')
43 * @see SearchQuery::getStatus()
45 const NO_POSITIVE_KEYWORDS = 1;
48 * Indicates that part of the search expression was ignored.
50 * To prevent Denial of Service attacks, only
51 * \Drupal::config('search.settings')->get('and_or_limit') expressions
52 * (positive keywords, phrases, negative keywords) are allowed; this flag
53 * indicates that expressions existed past that limit and they were removed.
55 * @see SearchQuery::getStatus()
57 const EXPRESSIONS_IGNORED = 2;
60 * Indicates that lower-case "or" was in the search expression.
62 * The word "or" in lower case was found in the search expression. This
63 * probably means someone was trying to do an OR search but used lower-case
64 * instead of upper-case.
66 * @see SearchQuery::getStatus()
68 const LOWER_CASE_OR = 4;
71 * Indicates that no positive keyword matches were found.
73 * @see SearchQuery::getStatus()
75 const NO_KEYWORD_MATCHES = 8;
78 * The keywords and advanced search options that are entered by the user.
82 protected $searchExpression;
85 * The type of search (search type).
87 * This maps to the value of the type column in search_index, and is usually
88 * equal to the machine-readable name of the plugin or the search page.
95 * Parsed-out positive and negative search keys.
99 protected $keys = ['positive' => [], 'negative' => []];
102 * Indicates whether the query conditions are simple or complex (LIKE).
106 protected $simple = TRUE;
109 * Conditions that are used for exact searches.
111 * This is always used for the second step in the query, but is not part of
112 * the preparation step unless $this->simple is FALSE.
114 * @var DatabaseCondition
116 protected $conditions;
119 * Indicates how many matches for a search query are necessary.
123 protected $matches = 0;
126 * Array of positive search words.
128 * These words have to match against {search_index}.word.
132 protected $words = [];
135 * Multiplier to normalize the keyword score.
137 * This value is calculated by the preparation step, and is used as a
138 * multiplier of the word scores to make sure they are between 0 and 1.
142 protected $normalize = 0;
145 * Indicates whether the preparation step has been executed.
149 protected $executedPrepare = FALSE;
152 * A bitmap of status conditions, described in getStatus().
156 * @see SearchQuery::getStatus()
158 protected $status = 0;
161 * The word score expressions.
165 * @see SearchQuery::addScore()
167 protected $scores = [];
170 * Arguments for the score expressions.
174 protected $scoresArguments = [];
177 * The number of 'i.relevance' occurrences in score expressions.
181 protected $relevance_count = 0;
184 * Multipliers for score expressions.
188 protected $multiply = [];
191 * Sets the search query expression.
193 * @param string $expression
194 * A search string, which can contain keywords and options.
195 * @param string $type
196 * The search type. This maps to {search_index}.type in the database.
200 public function searchExpression($expression, $type) {
201 $this->searchExpression = $expression;
205 $this->addTag('search_' . $type);
207 // Initialize conditions and status.
208 $this->conditions = new Condition('AND');
215 * Parses the search query into SQL conditions.
217 * Sets up the following variables:
220 * - $this->conditions
224 protected function parseSearchExpression() {
225 // Matches words optionally prefixed by a - sign. A word in this case is
226 // something between two spaces, optionally quoted.
227 preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression, $keywords, PREG_SET_ORDER);
229 if (count($keywords) == 0) {
235 $limit_combinations = \Drupal::config('search.settings')->get('and_or_limit');
236 // The first search expression does not count as AND.
239 foreach ($keywords as $match) {
240 if ($or_count && $and_count + $or_count >= $limit_combinations) {
241 // Ignore all further search expressions to prevent Denial-of-Service
242 // attacks using a high number of AND/OR combinations.
243 $this->status |= SearchQuery::EXPRESSIONS_IGNORED;
247 // Strip off phrase quotes.
249 if ($match[2]{0} == '"') {
250 $match[2] = substr($match[2], 1, -1);
252 $this->simple = FALSE;
255 // Simplify keyword according to indexing rules and external
256 // preprocessors. Use same process as during search indexing, so it
257 // will match search index.
258 $words = search_simplify($match[2]);
259 // Re-explode in case simplification added more words, except when
260 // matching a phrase.
261 $words = $phrase ? [$words] : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
263 if ($match[1] == '-') {
264 $this->keys['negative'] = array_merge($this->keys['negative'], $words);
266 // OR operator: instead of a single keyword, we store an array of all
268 elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
269 $last = array_pop($this->keys['positive']);
270 // Starting a new OR?
271 if (!is_array($last)) {
274 $this->keys['positive'][] = $last;
279 // AND operator: implied, so just ignore it.
280 elseif ($match[2] == 'AND' || $match[2] == 'and') {
286 if ($match[2] == 'or') {
287 // Lower-case "or" instead of "OR" is a warning condition.
288 $this->status |= SearchQuery::LOWER_CASE_OR;
291 // Add to last element (which is an array).
292 $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
295 $this->keys['positive'] = array_merge($this->keys['positive'], $words);
302 // Convert keywords into SQL statements.
306 foreach ($this->keys['positive'] as $key) {
307 // Group of ORed terms.
308 if (is_array($key) && count($key)) {
309 // If we had already found one OR, this is another one AND-ed with the
310 // first, meaning it is not a simple query.
312 $this->simple = FALSE;
315 $has_new_scores = FALSE;
316 $queryor = new Condition('OR');
317 foreach ($key as $or) {
318 list($num_new_scores) = $this->parseWord($or);
319 $has_new_scores |= $num_new_scores;
320 $queryor->condition('d.data', "% $or %", 'LIKE');
322 if (count($queryor)) {
323 $this->conditions->condition($queryor);
324 // A group of OR keywords only needs to match once.
325 $this->matches += ($has_new_scores > 0);
328 // Single ANDed term.
331 list($num_new_scores, $num_valid_words) = $this->parseWord($key);
332 $this->conditions->condition('d.data', "% $key %", 'LIKE');
333 if (!$num_valid_words) {
334 $this->simple = FALSE;
336 // Each AND keyword needs to match at least once.
337 $this->matches += $num_new_scores;
340 if ($has_and && $has_or) {
341 $this->simple = FALSE;
345 foreach ($this->keys['negative'] as $key) {
346 $this->conditions->condition('d.data', "% $key %", 'NOT LIKE');
347 $this->simple = FALSE;
352 * Parses a word or phrase for parseQuery().
354 * Splits a phrase into words. Adds its words to $this->words, if it is not
355 * already there. Returns a list containing the number of new words found,
356 * and the total number of words in the phrase.
358 protected function parseWord($word) {
360 $num_valid_words = 0;
362 // Determine the scorewords of this word/phrase.
363 $split = explode(' ', $word);
364 foreach ($split as $s) {
365 $num = is_numeric($s);
366 if ($num || mb_strlen($s) >= \Drupal::config('search.settings')->get('index.minimum_word_size')) {
367 if (!isset($this->words[$s])) {
368 $this->words[$s] = $s;
375 // Return matching snippet and number of added words.
376 return [$num_new_scores, $num_valid_words];
380 * Prepares the query and calculates the normalization factor.
382 * After the query is normalized the keywords are weighted to give the results
383 * a relevancy score. The query is ready for execution after this.
385 * Error and warning conditions can apply. Call getStatus() after calling
386 * this method to retrieve them.
389 * TRUE if at least one keyword matched the search index; FALSE if not.
391 public function prepareAndNormalize() {
392 $this->parseSearchExpression();
393 $this->executedPrepare = TRUE;
395 if (count($this->words) == 0) {
396 // Although the query could proceed, there is no point in joining
397 // with other tables and attempting to normalize if there are no
399 $this->status |= SearchQuery::NO_POSITIVE_KEYWORDS;
403 // Build the basic search query: match the entered keywords.
404 $or = new Condition('OR');
405 foreach ($this->words as $word) {
406 $or->condition('i.word', $word);
408 $this->condition($or);
410 // Add keyword normalization information to the query.
411 $this->join('search_total', 't', 'i.word = t.word');
413 ->condition('i.type', $this->type)
417 // If the query is simple, we should have calculated the number of
418 // matching words we need to find, so impose that criterion. For non-
419 // simple queries, this condition could lead to incorrectly deciding not
420 // to continue with the full query.
422 $this->having('COUNT(*) >= :matches', [':matches' => $this->matches]);
425 // Clone the query object to calculate normalization.
426 $normalize_query = clone $this->query;
428 // For complex search queries, add the LIKE conditions; if the query is
429 // simple, we do not need them for normalization.
430 if (!$this->simple) {
431 $normalize_query->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type AND i.langcode = d.langcode');
432 if (count($this->conditions)) {
433 $normalize_query->condition($this->conditions);
437 // Calculate normalization, which is the max of all the search scores for
438 // positive keywords in the query. And note that the query could have other
439 // fields added to it by the user of this extension.
440 $normalize_query->addExpression('SUM(i.score * t.count)', 'calculated_score');
441 $result = $normalize_query
443 ->orderBy('calculated_score', 'DESC')
446 if (isset($result->calculated_score)) {
447 $this->normalize = (float) $result->calculated_score;
450 if ($this->normalize) {
454 // If the normalization value was zero, that indicates there were no
455 // matches to the supplied positive keywords.
456 $this->status |= SearchQuery::NO_KEYWORD_MATCHES;
463 public function preExecute(SelectInterface $query = NULL) {
464 if (!$this->executedPrepare) {
465 $this->prepareAndNormalize();
468 if (!$this->normalize) {
472 return parent::preExecute($query);
476 * Adds a custom score expression to the search query.
478 * Score expressions are used to order search results. If no calls to
479 * addScore() have taken place, a default keyword relevance score will be
480 * used. However, if at least one call to addScore() has taken place, the
481 * keyword relevance score is not automatically added.
483 * Note that you must use this method to add ordering to your searches, and
484 * not call orderBy() directly, when using the SearchQuery extender. This is
485 * because of the two-pass system the SearchQuery class uses to normalize
488 * @param string $score
489 * The score expression, which should evaluate to a number between 0 and 1.
490 * The string 'i.relevance' in a score expression will be replaced by a
491 * measure of keyword relevance between 0 and 1.
492 * @param array $arguments
493 * Query arguments needed to provide values to the score expression.
494 * @param float $multiply
495 * If set, the score is multiplied with this value. However, all scores
496 * with multipliers are then divided by the total of all multipliers, so
497 * that overall, the normalization is maintained.
501 public function addScore($score, $arguments = [], $multiply = FALSE) {
503 $i = count($this->multiply);
504 // Modify the score expression so it is multiplied by the multiplier,
505 // with a divisor to renormalize. Note that the ROUND here is necessary
506 // for PostgreSQL and SQLite in order to ensure that the :multiply_* and
507 // :total_* arguments are treated as a numeric type, because the
508 // PostgreSQL PDO driver sometimes puts values in as strings instead of
509 // numbers in complex expressions like this.
510 $score = "(ROUND(:multiply_$i, 4)) * COALESCE(($score), 0) / (ROUND(:total_$i, 4))";
511 // Add an argument for the multiplier. The :total_$i argument is taken
512 // care of in the execute() method, which is when the total divisor is
514 $arguments[':multiply_' . $i] = $multiply;
515 $this->multiply[] = $multiply;
518 // Search scoring needs a way to include a keyword relevance in the score.
519 // For historical reasons, this is done by putting 'i.relevance' into the
520 // search expression. So, use string replacement to change this to a
521 // calculated query expression, counting the number of occurrences so
522 // in the execute() method we can add arguments.
523 while (($pos = strpos($score, 'i.relevance')) !== FALSE) {
524 $pieces = explode('i.relevance', $score, 2);
525 $score = implode('((ROUND(:normalization_' . $this->relevance_count . ', 4)) * i.score * t.count)', $pieces);
526 $this->relevance_count++;
529 $this->scores[] = $score;
530 $this->scoresArguments += $arguments;
536 * Executes the search.
538 * The complex conditions are applied to the query including score
539 * expressions and ordering.
541 * Error and warning conditions can apply. Call getStatus() after calling
542 * this method to retrieve them.
544 * @return \Drupal\Core\Database\StatementInterface|null
545 * A query result set containing the results of the query.
547 public function execute() {
548 if (!$this->preExecute($this)) {
552 // Add conditions to the query.
553 $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type AND i.langcode = d.langcode');
554 if (count($this->conditions)) {
555 $this->condition($this->conditions);
558 // Add default score (keyword relevance) if there are not any defined.
559 if (empty($this->scores)) {
560 $this->addScore('i.relevance');
563 if (count($this->multiply)) {
564 // Re-normalize scores with multipliers by dividing by the total of all
565 // multipliers. The expressions were altered in addScore(), so here just
566 // add the arguments for the total.
567 $sum = array_sum($this->multiply);
568 for ($i = 0; $i < count($this->multiply); $i++) {
569 $this->scoresArguments[':total_' . $i] = $sum;
573 // Add arguments for the keyword relevance normalization number.
574 $normalization = 1.0 / $this->normalize;
575 for ($i = 0; $i < $this->relevance_count; $i++) {
576 $this->scoresArguments[':normalization_' . $i] = $normalization;
579 // Add all scores together to form a query field.
580 $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
582 // If an order has not yet been set for this query, add a default order
583 // that sorts by the calculated sum of scores.
584 if (count($this->getOrderBy()) == 0) {
585 $this->orderBy('calculated_score', 'DESC');
588 // Add query metadata.
590 ->addMetaData('normalize', $this->normalize)
591 ->fields('i', ['type', 'sid']);
592 return $this->query->execute();
596 * Builds the default count query for SearchQuery.
598 * Since SearchQuery always uses GROUP BY, we can default to a subquery. We
599 * also add the same conditions as execute() because countQuery() is called
602 public function countQuery() {
603 if (!$this->executedPrepare) {
604 $this->prepareAndNormalize();
607 // Clone the inner query.
608 $inner = clone $this->query;
610 // Add conditions to query.
611 $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
612 if (count($this->conditions)) {
613 $inner->condition($this->conditions);
616 // Remove existing fields and expressions, they are not needed for a count
618 $fields =& $inner->getFields();
620 $expressions =& $inner->getExpressions();
623 // Add sid as the only field and count them as a subquery.
624 $count = db_select($inner->fields('i', ['sid']), NULL, ['target' => 'replica']);
626 // Add the COUNT() expression.
627 $count->addExpression('COUNT(*)');
633 * Returns the query status bitmap.
636 * A bitmap indicating query status. Zero indicates there were no problems.
637 * A non-zero value is a combination of one or more of the following flags:
638 * - SearchQuery::NO_POSITIVE_KEYWORDS
639 * - SearchQuery::EXPRESSIONS_IGNORED
640 * - SearchQuery::LOWER_CASE_OR
641 * - SearchQuery::NO_KEYWORD_MATCHES
643 public function getStatus() {
644 return $this->status;