Drupal 9: Remove the nodes marked as "noindex" from search API results

4 Dec.2021
Image
Search

In a project where we use the Search API to search for content, we noticed that nodes that are marked as "noindex" by the Metatag module are visible during internal searches.
Here is a ready-made solution for how to avoid this.
Perhaps hook_entity_update() is more suitable here instead of hook_cron(), however, for our implementation with many existing nodes, we decided to implement it through cron.
Everyone has a good time of the day.
Do not forget to leave a comment on how you implemented it.

File
MYMODULE.module
/**
 * Implements hook_cron().
 */
function MYMODULE_cron() {
  // We usually don't want to act every time cron runs (which could be every
  // minute) so keep a time for the next run in the site state.
  $next_execution = \Drupal::state()
    ->get('MYMODULE.cron_next_execution', 0);
  $request_time = \Drupal::time()->getRequestTime();
  if ($request_time >= $next_execution) {
    // Remove the nodes marked as "noindex" from search API results.
    $query = \Drupal::database()
      ->select('node__field_meta_tags', 'm');
    $query->join('node_field_data', 'n', 'm.entity_id=n.nid');
    $query->addField('n', 'nid');
    $query->condition('m.field_meta_tags_value', '%noindex%', 'LIKE');
    $nids = $query->execute()->fetchCol();
 
    if (!empty($nids)) {
      $nodes = \Drupal::entityTypeManager()
        ->getStorage('node')
        ->loadMultiple($nids);
 
      foreach ($nodes as $node) {
        // Call this hook on behalf of the Content Entity datasource.
        \Drupal::getContainer()
          ->get('search_api.entity_datasource.tracking_manager')
          ->entityDelete($node);
 
        // Attempt to track all items as changed that indexed the entity indirectly.
        \Drupal::getContainer()->get('search_api.tracking_helper')
          ->trackReferencedEntityUpdate($node, TRUE);
 
        $log_message = t('Node %title was removed from search index because has "noindex" metatag setting.', ['%title' => $node->label()]);
        \Drupal::logger('MYMODULE')->notice($log_message);
 
      }
 
      \Drupal::state()
        ->set('MYMODULE.cron_next_execution', $request_time + 3600);
    }
  }
}