Source: includes/classes/Indexable/Post/Post.php

  1. <?php
  2. /**
  3. * Post indexable
  4. *
  5. * @since 3.0
  6. * @package elasticpress
  7. */
  8. namespace ElasticPress\Indexable\Post;
  9. use WP_Query;
  10. use WP_User;
  11. use ElasticPress\Elasticsearch;
  12. use ElasticPress\Indexable;
  13. if ( ! defined( 'ABSPATH' ) ) {
  14. // @codeCoverageIgnoreStart
  15. exit; // Exit if accessed directly.
  16. // @codeCoverageIgnoreEnd
  17. }
  18. /**
  19. * Post indexable class
  20. */
  21. class Post extends Indexable {
  22. /**
  23. * Indexable slug used for identification
  24. *
  25. * @var string
  26. * @since 3.0
  27. */
  28. public $slug = 'post';
  29. /**
  30. * Flag to indicate if the indexable has support for
  31. * `id_range` pagination method during a sync.
  32. *
  33. * @var boolean
  34. * @since 4.1.0
  35. */
  36. public $support_indexing_advanced_pagination = true;
  37. /**
  38. * Create indexable and initialize dependencies
  39. *
  40. * @since 3.0
  41. */
  42. public function __construct() {
  43. $this->labels = [
  44. 'plural' => esc_html__( 'Posts', 'elasticpress' ),
  45. 'singular' => esc_html__( 'Post', 'elasticpress' ),
  46. ];
  47. $this->sync_manager = new SyncManager( $this->slug );
  48. $this->query_integration = new QueryIntegration( $this->slug );
  49. }
  50. /**
  51. * Query database for posts
  52. *
  53. * @param array $args Query DB args
  54. * @since 3.0
  55. * @return array
  56. */
  57. public function query_db( $args ) {
  58. $defaults = [
  59. 'posts_per_page' => $this->get_bulk_items_per_page(),
  60. 'post_type' => $this->get_indexable_post_types(),
  61. 'post_status' => $this->get_indexable_post_status(),
  62. 'offset' => 0,
  63. 'ignore_sticky_posts' => true,
  64. 'orderby' => 'ID',
  65. 'order' => 'desc',
  66. 'no_found_rows' => false,
  67. 'ep_indexing_advanced_pagination' => true,
  68. 'has_password' => false,
  69. ];
  70. if ( isset( $args['per_page'] ) ) {
  71. $args['posts_per_page'] = $args['per_page'];
  72. }
  73. if ( isset( $args['include'] ) ) {
  74. $args['post__in'] = $args['include'];
  75. }
  76. if ( isset( $args['exclude'] ) ) {
  77. $args['post__not_in'] = $args['exclude'];
  78. }
  79. /**
  80. * Filter arguments used to query posts from database
  81. *
  82. * @hook ep_post_query_db_args
  83. * @param {array} $args Database arguments
  84. * @return {array} New arguments
  85. */
  86. $args = apply_filters( 'ep_index_posts_args', apply_filters( 'ep_post_query_db_args', wp_parse_args( $args, $defaults ) ) );
  87. if ( isset( $args['post__in'] ) || 0 < $args['offset'] ) {
  88. // Disable advanced pagination. Not useful if only indexing specific IDs.
  89. $args['ep_indexing_advanced_pagination'] = false;
  90. }
  91. // Enforce the following query args during advanced pagination to ensure things work correctly.
  92. if ( $args['ep_indexing_advanced_pagination'] ) {
  93. $args = array_merge(
  94. $args,
  95. [
  96. 'suppress_filters' => false,
  97. 'orderby' => 'ID',
  98. 'order' => 'DESC',
  99. 'paged' => 1,
  100. 'offset' => 0,
  101. 'no_found_rows' => true,
  102. ]
  103. );
  104. add_filter( 'posts_where', array( $this, 'bulk_indexing_filter_posts_where' ), 9999, 2 );
  105. $query = new WP_Query( $args );
  106. $total_objects = $this->get_total_objects_for_query( $args );
  107. remove_filter( 'posts_where', array( $this, 'bulk_indexing_filter_posts_where' ), 9999, 2 );
  108. } else {
  109. $query = new WP_Query( $args );
  110. $total_objects = $query->found_posts;
  111. }
  112. return [
  113. 'objects' => $query->posts,
  114. 'total_objects' => $total_objects,
  115. ];
  116. }
  117. /**
  118. * Manipulate the WHERE clause of the bulk indexing query to paginate by ID in order to avoid performance issues with SQL offset.
  119. *
  120. * @param string $where The current $where clause.
  121. * @param WP_Query $query WP_Query object.
  122. * @return string WHERE clause with our pagination added if needed.
  123. */
  124. public function bulk_indexing_filter_posts_where( $where, $query ) {
  125. $using_advanced_pagination = $query->get( 'ep_indexing_advanced_pagination', false );
  126. if ( $using_advanced_pagination ) {
  127. $requested_upper_limit_id = $query->get( 'ep_indexing_upper_limit_object_id', PHP_INT_MAX );
  128. $requested_lower_limit_post_id = $query->get( 'ep_indexing_lower_limit_object_id', 0 );
  129. $last_processed_id = $query->get( 'ep_indexing_last_processed_object_id', null );
  130. // On the first loopthrough we begin with the requested upper limit ID. Afterwards, use the last processed ID to paginate.
  131. $upper_limit_range_post_id = $requested_upper_limit_id;
  132. if ( is_numeric( $last_processed_id ) ) {
  133. $upper_limit_range_post_id = $last_processed_id - 1;
  134. }
  135. // Sanitize. Abort if unexpected data at this point.
  136. if ( ! is_numeric( $upper_limit_range_post_id ) || ! is_numeric( $requested_lower_limit_post_id ) ) {
  137. return $where;
  138. }
  139. $range = [
  140. 'upper_limit' => "{$GLOBALS['wpdb']->posts}.ID <= {$upper_limit_range_post_id}",
  141. 'lower_limit' => "{$GLOBALS['wpdb']->posts}.ID >= {$requested_lower_limit_post_id}",
  142. ];
  143. // Skip the end range if it's unnecessary.
  144. $skip_ending_range = 0 === $requested_lower_limit_post_id;
  145. $where = $skip_ending_range ? "AND {$range['upper_limit']} {$where}" : "AND {$range['upper_limit']} AND {$range['lower_limit']} {$where}";
  146. }
  147. return $where;
  148. }
  149. /**
  150. * Get SQL_CALC_FOUND_ROWS for a specific query based on it's args.
  151. *
  152. * @param array $query_args The query args.
  153. * @return int The query result's found_posts.
  154. */
  155. protected function get_total_objects_for_query( $query_args ) {
  156. static $object_counts = [];
  157. // Reset the pagination-related args for optimal caching.
  158. $normalized_query_args = array_merge(
  159. $query_args,
  160. [
  161. 'offset' => 0,
  162. 'paged' => 1,
  163. 'posts_per_page' => 1,
  164. 'no_found_rows' => false,
  165. 'ep_indexing_last_processed_object_id' => null,
  166. ]
  167. );
  168. $cache_key = md5( get_current_blog_id() . wp_json_encode( $normalized_query_args ) );
  169. if ( ! isset( $object_counts[ $cache_key ] ) ) {
  170. $object_counts[ $cache_key ] = ( new WP_Query( $normalized_query_args ) )->found_posts;
  171. }
  172. if ( 0 === $object_counts[ $cache_key ] ) {
  173. // Do a DB count to make sure the query didn't just die and return 0.
  174. $db_post_count = $this->get_total_objects_for_query_from_db( $normalized_query_args );
  175. if ( $db_post_count !== $object_counts[ $cache_key ] ) {
  176. $object_counts[ $cache_key ] = $db_post_count;
  177. }
  178. }
  179. return $object_counts[ $cache_key ];
  180. }
  181. /**
  182. * Get total posts from DB for a specific query based on it's args.
  183. *
  184. * @param array $query_args The query args.
  185. * @since 4.0.0
  186. * @return int The total posts.
  187. */
  188. protected function get_total_objects_for_query_from_db( $query_args ) {
  189. global $wpdb;
  190. $post_count = 0;
  191. if ( ! isset( $query_args['post_type'] ) || isset( $query_args['ep_indexing_upper_limit_object_id'] )
  192. || isset( $query_args['ep_indexing_lower_limit_object_id'] ) ) {
  193. return $post_count;
  194. }
  195. foreach ( $query_args['post_type'] as $post_type ) {
  196. $post_counts_by_post_status = wp_count_posts( $post_type );
  197. foreach ( $post_counts_by_post_status as $post_status => $post_status_count ) {
  198. if ( ! in_array( $post_status, $query_args['post_status'], true ) ) {
  199. continue;
  200. }
  201. $post_count += $post_status_count;
  202. }
  203. }
  204. /**
  205. * As `wp_count_posts` will also count posts with password, we need to remove
  206. * them from the final count if they will not be used.
  207. *
  208. * The if below will pass if `has_password` is false but not null.
  209. */
  210. if ( isset( $query_args['has_password'] ) && ! $query_args['has_password'] ) {
  211. $posts_with_password = (int) $wpdb->get_var( "SELECT COUNT(1) AS posts_with_password FROM {$wpdb->posts} WHERE post_password != ''" ); // phpcs:ignore WordPress.DB.DirectDatabaseQuery
  212. $post_count -= $posts_with_password;
  213. }
  214. return $post_count;
  215. }
  216. /**
  217. * Returns indexable post types for the current site
  218. *
  219. * @since 0.9
  220. * @return mixed|void
  221. */
  222. public function get_indexable_post_types() {
  223. $post_types = get_post_types( array( 'public' => true ) );
  224. /**
  225. * Remove attachments by default
  226. *
  227. * @since 3.0
  228. */
  229. unset( $post_types['attachment'] );
  230. /**
  231. * Filter indexable post types
  232. *
  233. * @hook ep_indexable_post_types
  234. * @param {array} $post_types Indexable post types
  235. * @return {array} New post types
  236. */
  237. return apply_filters( 'ep_indexable_post_types', $post_types );
  238. }
  239. /**
  240. * Return indexable post_status for the current site
  241. *
  242. * @since 1.3
  243. * @return array
  244. */
  245. public function get_indexable_post_status() {
  246. /**
  247. * Filter indexable post statuses
  248. *
  249. * @hook ep_indexable_post_status
  250. * @param {array} $post_statuses Indexable post statuses
  251. * @return {array} New post statuses
  252. */
  253. return apply_filters( 'ep_indexable_post_status', array( 'publish' ) );
  254. }
  255. /**
  256. * Determine required mapping file
  257. *
  258. * @since 3.6.2
  259. * @return string
  260. */
  261. public function get_mapping_name() {
  262. $es_version = Elasticsearch::factory()->get_elasticsearch_version();
  263. if ( empty( $es_version ) ) {
  264. /**
  265. * Filter fallback Elasticsearch version
  266. *
  267. * @hook ep_fallback_elasticsearch_version
  268. * @param {string} $version Fall back Elasticsearch version
  269. * @return {string} New version
  270. */
  271. $es_version = apply_filters( 'ep_fallback_elasticsearch_version', '2.0' );
  272. }
  273. $es_version = (string) $es_version;
  274. $mapping_file = '7-0.php';
  275. if ( version_compare( $es_version, '7.0', '<' ) ) {
  276. $mapping_file = '5-2.php';
  277. }
  278. return apply_filters( 'ep_post_mapping_version', $mapping_file );
  279. }
  280. /**
  281. * Generate the mapping array
  282. *
  283. * @since 4.1.0
  284. * @return array
  285. */
  286. public function generate_mapping() {
  287. $mapping_file = $this->get_mapping_name();
  288. /**
  289. * Filter post indexable mapping file
  290. *
  291. * @hook ep_post_mapping_file
  292. * @param {string} $file Path to file
  293. * @return {string} New file path
  294. */
  295. $mapping = require apply_filters( 'ep_post_mapping_file', __DIR__ . '/../../../mappings/post/' . $mapping_file );
  296. /**
  297. * Filter post indexable mapping
  298. *
  299. * @hook ep_post_mapping
  300. * @param {array} $mapping Mapping
  301. * @return {array} New mapping
  302. */
  303. $mapping = apply_filters( 'ep_post_mapping', $mapping );
  304. delete_transient( 'ep_post_mapping_version' );
  305. return $mapping;
  306. }
  307. /**
  308. * Determine version of mapping currently on the post index.
  309. *
  310. * @since 3.6.2
  311. * @return string|WP_Error|false $version
  312. */
  313. public function determine_mapping_version() {
  314. $version = get_transient( 'ep_post_mapping_version' );
  315. if ( empty( $version ) ) {
  316. $index = $this->get_index_name();
  317. $mapping = Elasticsearch::factory()->get_mapping( $index );
  318. if ( empty( $mapping ) ) {
  319. return new \WP_Error( 'ep_failed_mapping_version', esc_html__( 'Error while fetching the mapping version.', 'elasticpress' ) );
  320. }
  321. if ( ! isset( $mapping[ $index ] ) ) {
  322. return false;
  323. }
  324. $version = $this->determine_mapping_version_based_on_existing( $mapping, $index );
  325. set_transient(
  326. 'ep_post_mapping_version',
  327. $version,
  328. /**
  329. * Filter the post mapping version cache expiration.
  330. *
  331. * @hook ep_post_mapping_version_cache_expiration
  332. * @since 3.6.5
  333. * @param {int} $version Time in seconds for the transient expiration
  334. * @return {int} New time
  335. */
  336. apply_filters( 'ep_post_mapping_version_cache_expiration', DAY_IN_SECONDS )
  337. );
  338. }
  339. /**
  340. * Filter the mapping version for posts.
  341. *
  342. * @hook ep_post_mapping_version_determined
  343. * @since 3.6.2
  344. * @param {string} $version Determined version string
  345. * @return {string} New version string
  346. */
  347. return apply_filters( 'ep_post_mapping_version_determined', $version );
  348. }
  349. /**
  350. * Prepare a post for syncing
  351. *
  352. * @param int $post_id Post ID.
  353. * @since 0.9.1
  354. * @return bool|array
  355. */
  356. public function prepare_document( $post_id ) {
  357. global $post;
  358. $post = get_post( $post_id );
  359. setup_postdata( $post );
  360. if ( empty( $post ) ) {
  361. return false;
  362. }
  363. $user = get_userdata( $post->post_author );
  364. if ( $user instanceof WP_User ) {
  365. $user_data = array(
  366. 'raw' => $user->user_login,
  367. 'login' => $user->user_login,
  368. 'display_name' => $user->display_name,
  369. 'id' => $user->ID,
  370. );
  371. } else {
  372. $user_data = array(
  373. 'raw' => '',
  374. 'login' => '',
  375. 'display_name' => '',
  376. 'id' => '',
  377. );
  378. }
  379. $post_date = $post->post_date;
  380. $post_date_gmt = $post->post_date_gmt;
  381. $post_modified = $post->post_modified;
  382. $post_modified_gmt = $post->post_modified_gmt;
  383. $comment_count = absint( $post->comment_count );
  384. $comment_status = $post->comment_status;
  385. $ping_status = $post->ping_status;
  386. $menu_order = (int) $post->menu_order;
  387. /**
  388. * Filter to ignore invalid dates
  389. *
  390. * @hook ep_ignore_invalid_dates
  391. * @param {bool} $ignore True to ignore
  392. * @param {int} $post_id Post ID
  393. * @param {WP_Post} $post Post object
  394. * @return {bool} New ignore value
  395. */
  396. if ( apply_filters( 'ep_ignore_invalid_dates', true, $post_id, $post ) ) {
  397. if ( ! strtotime( $post_date ) || '0000-00-00 00:00:00' === $post_date ) {
  398. $post_date = null;
  399. }
  400. if ( ! strtotime( $post_date_gmt ) || '0000-00-00 00:00:00' === $post_date_gmt ) {
  401. $post_date_gmt = null;
  402. }
  403. if ( ! strtotime( $post_modified ) || '0000-00-00 00:00:00' === $post_modified ) {
  404. $post_modified = null;
  405. }
  406. if ( ! strtotime( $post_modified_gmt ) || '0000-00-00 00:00:00' === $post_modified_gmt ) {
  407. $post_modified_gmt = null;
  408. }
  409. }
  410. // To prevent infinite loop, we don't queue when updated_postmeta.
  411. remove_action( 'updated_postmeta', [ $this->sync_manager, 'action_queue_meta_sync' ], 10 );
  412. /**
  413. * Filter to allow indexing of filtered post content
  414. *
  415. * @hook ep_allow_post_content_filtered_index
  416. * @param {bool} $ignore True to allow
  417. * @return {bool} New value
  418. */
  419. $post_content_filtered_allowed = apply_filters( 'ep_allow_post_content_filtered_index', true );
  420. $post_args = array(
  421. 'post_id' => $post_id,
  422. 'ID' => $post_id,
  423. 'post_author' => $user_data,
  424. 'post_date' => $post_date,
  425. 'post_date_gmt' => $post_date_gmt,
  426. 'post_title' => $post->post_title,
  427. 'post_excerpt' => $post->post_excerpt,
  428. 'post_content_filtered' => $post_content_filtered_allowed ? apply_filters( 'the_content', $post->post_content ) : '',
  429. 'post_content' => $post->post_content,
  430. 'post_status' => $post->post_status,
  431. 'post_name' => $post->post_name,
  432. 'post_modified' => $post_modified,
  433. 'post_modified_gmt' => $post_modified_gmt,
  434. 'post_parent' => $post->post_parent,
  435. 'post_type' => $post->post_type,
  436. 'post_mime_type' => $post->post_mime_type,
  437. 'permalink' => get_permalink( $post_id ),
  438. 'terms' => $this->prepare_terms( $post ),
  439. 'meta' => $this->prepare_meta_types( $this->prepare_meta( $post ) ), // post_meta removed in 2.4.
  440. 'date_terms' => $this->prepare_date_terms( $post_date ),
  441. 'comment_count' => $comment_count,
  442. 'comment_status' => $comment_status,
  443. 'ping_status' => $ping_status,
  444. 'menu_order' => $menu_order,
  445. 'guid' => $post->guid,
  446. 'thumbnail' => $this->prepare_thumbnail( $post ),
  447. );
  448. /**
  449. * Filter sync arguments for a post. For backwards compatibility.
  450. *
  451. * @hook ep_post_sync_args
  452. * @param {array} $post_args Post arguments
  453. * @param {int} $post_id Post ID
  454. * @return {array} New arguments
  455. */
  456. $post_args = apply_filters( 'ep_post_sync_args', $post_args, $post_id );
  457. /**
  458. * Filter sync arguments for a post after meta preparation.
  459. *
  460. * @hook ep_post_sync_args_post_prepare_meta
  461. * @param {array} $post_args Post arguments
  462. * @param {int} $post_id Post ID
  463. * @return {array} New arguments
  464. */
  465. $post_args = apply_filters( 'ep_post_sync_args_post_prepare_meta', $post_args, $post_id );
  466. // Turn back on updated_postmeta hook
  467. add_action( 'updated_postmeta', [ $this->sync_manager, 'action_queue_meta_sync' ], 10, 4 );
  468. return $post_args;
  469. }
  470. /**
  471. * Prepare thumbnail to send to ES.
  472. *
  473. * @param WP_Post $post Post object.
  474. * @return array|null Thumbnail data.
  475. */
  476. public function prepare_thumbnail( $post ) {
  477. $attachment_id = get_post_thumbnail_id( $post );
  478. if ( ! $attachment_id ) {
  479. return null;
  480. }
  481. /**
  482. * Filters the image size to use when indexing the post thumbnail.
  483. *
  484. * Defaults to the `woocommerce_thumbnail` size if WooCommerce is in
  485. * use. Otherwise the `thumbnail` size is used.
  486. *
  487. * @hook ep_thumbnail_image_size
  488. * @since 4.0.0
  489. * @param {string|int[]} $image_size Image size. Can be any registered
  490. * image size name, or an array of
  491. * width and height values in pixels
  492. * (in that order).
  493. * @param {WP_Post} $post Post being indexed.
  494. * @return {array} Image size to pass to wp_get_attachment_image_src().
  495. */
  496. $image_size = apply_filters(
  497. 'ep_post_thumbnail_image_size',
  498. function_exists( 'WC' ) ? 'woocommerce_thumbnail' : 'thumbnail',
  499. $post
  500. );
  501. $image_src = wp_get_attachment_image_src( $attachment_id, $image_size );
  502. $image_alt = trim( wp_strip_all_tags( get_post_meta( $attachment_id, '_wp_attachment_image_alt', true ) ) );
  503. if ( ! $image_src ) {
  504. return null;
  505. }
  506. return [
  507. 'ID' => $attachment_id,
  508. 'src' => $image_src[0],
  509. 'width' => $image_src[1],
  510. 'height' => $image_src[2],
  511. 'alt' => $image_alt,
  512. ];
  513. }
  514. /**
  515. * Prepare date terms to send to ES.
  516. *
  517. * @param null|string $date_to_prepare Post date
  518. * @since 0.1.4
  519. * @return array
  520. */
  521. public function prepare_date_terms( $date_to_prepare ) {
  522. $terms_to_prepare = [
  523. 'year' => 'Y',
  524. 'month' => 'm',
  525. 'week' => 'W',
  526. 'dayofyear' => 'z',
  527. 'day' => 'd',
  528. 'dayofweek' => 'w',
  529. 'dayofweek_iso' => 'N',
  530. 'hour' => 'H',
  531. 'minute' => 'i',
  532. 'second' => 's',
  533. 'm' => 'Ym', // yearmonth
  534. ];
  535. // Combine all the date term formats and perform one single call to date_i18n() for performance.
  536. $date_format = implode( '||', array_values( $terms_to_prepare ) );
  537. $combined_dates = explode( '||', date_i18n( $date_format, strtotime( (string) $date_to_prepare ) ) );
  538. // Then split up the results for individual indexing.
  539. $date_terms = [];
  540. foreach ( $terms_to_prepare as $term_name => $date_format ) {
  541. $index_in_combined_format = array_search( $term_name, array_keys( $terms_to_prepare ), true );
  542. $date_terms[ $term_name ] = (int) $combined_dates[ $index_in_combined_format ];
  543. }
  544. return $date_terms;
  545. }
  546. /**
  547. * Get an array of taxonomies that are indexable for the given post
  548. *
  549. * @since 4.0.0
  550. * @param WP_Post $post Post object
  551. * @return array Array of WP_Taxonomy objects that should be indexed
  552. */
  553. public function get_indexable_post_taxonomies( $post ) {
  554. $taxonomies = get_object_taxonomies( $post->post_type, 'objects' );
  555. $selected_taxonomies = [];
  556. foreach ( $taxonomies as $taxonomy ) {
  557. if ( $taxonomy->public || $taxonomy->publicly_queryable ) {
  558. $selected_taxonomies[] = $taxonomy;
  559. }
  560. }
  561. /**
  562. * Filter taxonomies to be synced with post
  563. *
  564. * @hook ep_sync_taxonomies
  565. * @param {array} $selected_taxonomies Selected taxonomies
  566. * @param {WP_Post} Post object
  567. * @return {array} New taxonomies
  568. */
  569. $selected_taxonomies = (array) apply_filters( 'ep_sync_taxonomies', $selected_taxonomies, $post );
  570. // Important we validate here to ensure there are no invalid taxonomy values returned from the filter, as just one would cause wp_get_object_terms() to fail.
  571. $validated_taxonomies = [];
  572. foreach ( $selected_taxonomies as $selected_taxonomy ) {
  573. // If we get a taxonomy name, we need to convert it to taxonomy object
  574. if ( ! is_object( $selected_taxonomy ) && taxonomy_exists( (string) $selected_taxonomy ) ) {
  575. $selected_taxonomy = get_taxonomy( $selected_taxonomy );
  576. }
  577. // We check if the $taxonomy object has a valid name property. Backward compatibility since WP_Taxonomy introduced in WP 4.7
  578. if ( ! is_a( $selected_taxonomy, '\WP_Taxonomy' ) || ! property_exists( $selected_taxonomy, 'name' ) || ! taxonomy_exists( $selected_taxonomy->name ) ) {
  579. continue;
  580. }
  581. $validated_taxonomies[] = $selected_taxonomy;
  582. }
  583. return $validated_taxonomies;
  584. }
  585. /**
  586. * Prepare terms to send to ES.
  587. *
  588. * @param WP_Post $post Post object
  589. * @since 0.1.0
  590. * @return array
  591. */
  592. private function prepare_terms( $post ) {
  593. $selected_taxonomies = $this->get_indexable_post_taxonomies( $post );
  594. if ( empty( $selected_taxonomies ) ) {
  595. return [];
  596. }
  597. $terms = [];
  598. /**
  599. * Filter to allow child terms to be indexed
  600. *
  601. * @hook ep_sync_terms_allow_hierarchy
  602. * @param {bool} $allow True means allow
  603. * @return {bool} New value
  604. */
  605. $allow_hierarchy = apply_filters( 'ep_sync_terms_allow_hierarchy', true );
  606. foreach ( $selected_taxonomies as $taxonomy ) {
  607. $object_terms = get_the_terms( $post->ID, $taxonomy->name );
  608. if ( ! $object_terms || is_wp_error( $object_terms ) ) {
  609. continue;
  610. }
  611. $terms_dic = [];
  612. foreach ( $object_terms as $term ) {
  613. if ( ! isset( $terms_dic[ $term->term_id ] ) ) {
  614. $terms_dic[ $term->term_id ] = $this->get_formatted_term( $term, $post->ID );
  615. if ( $allow_hierarchy ) {
  616. $terms_dic = $this->get_parent_terms( $terms_dic, $term, $taxonomy->name, $post->ID );
  617. }
  618. }
  619. }
  620. $terms[ $taxonomy->name ] = array_values( $terms_dic );
  621. }
  622. return $terms;
  623. }
  624. /**
  625. * Recursively get all the ancestor terms of the given term
  626. *
  627. * @param array $terms Terms array
  628. * @param WP_Term $term Current term
  629. * @param string $tax_name Taxonomy
  630. * @param int $object_id Post ID
  631. *
  632. * @return array
  633. */
  634. private function get_parent_terms( $terms, $term, $tax_name, $object_id ) {
  635. $parent_term = get_term( $term->parent, $tax_name );
  636. if ( ! $parent_term || is_wp_error( $parent_term ) ) {
  637. return $terms;
  638. }
  639. if ( ! isset( $terms[ $parent_term->term_id ] ) ) {
  640. $terms[ $parent_term->term_id ] = $this->get_formatted_term( $parent_term, $object_id );
  641. }
  642. return $this->get_parent_terms( $terms, $parent_term, $tax_name, $object_id );
  643. }
  644. /**
  645. * Given a term, format it to be appended to the post ES document.
  646. *
  647. * @since 4.5.0
  648. * @param \WP_Term $term Term to be formatted
  649. * @param int $post_id The post ID
  650. * @return array
  651. */
  652. private function get_formatted_term( \WP_Term $term, int $post_id ): array {
  653. $formatted_term = [
  654. 'term_id' => $term->term_id,
  655. 'slug' => $term->slug,
  656. 'name' => $term->name,
  657. 'parent' => $term->parent,
  658. 'term_taxonomy_id' => $term->term_taxonomy_id,
  659. 'term_order' => (int) $this->get_term_order( $term->term_taxonomy_id, $post_id ),
  660. ];
  661. /**
  662. * As the name implies, the facet attribute is used to list all terms in facets.
  663. * As in facets, the term_order associated with a post does not matter, we set it as 0 here.
  664. * Note that this is set as 0 instead of simply removed to keep backward compatibility.
  665. */
  666. $term_facet = $formatted_term;
  667. $term_facet['term_order'] = 0;
  668. $formatted_term['facet'] = wp_json_encode( $term_facet );
  669. return $formatted_term;
  670. }
  671. /**
  672. * Retrieves term order for the object/term_taxonomy_id combination
  673. *
  674. * @param int $term_taxonomy_id Term Taxonomy ID
  675. * @param int $object_id Post ID
  676. *
  677. * @return int Term Order
  678. */
  679. protected function get_term_order( $term_taxonomy_id, $object_id ) {
  680. global $wpdb;
  681. $cache_key = "{$object_id}_term_order";
  682. $term_orders = wp_cache_get( $cache_key );
  683. if ( false === $term_orders ) {
  684. $results = $wpdb->get_results( // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery
  685. $wpdb->prepare(
  686. "SELECT term_taxonomy_id, term_order from $wpdb->term_relationships where object_id=%d;",
  687. $object_id
  688. ),
  689. ARRAY_A
  690. );
  691. $term_orders = [];
  692. foreach ( $results as $result ) {
  693. $term_orders[ $result['term_taxonomy_id'] ] = $result['term_order'];
  694. }
  695. wp_cache_set( $cache_key, $term_orders );
  696. }
  697. return isset( $term_orders[ $term_taxonomy_id ] ) ? (int) $term_orders[ $term_taxonomy_id ] : 0;
  698. }
  699. /**
  700. * Checks if meta key is allowed
  701. *
  702. * @param string $meta_key meta key to check
  703. * @param WP_Post $post Post object
  704. * @since 4.3.0
  705. * @return boolean
  706. */
  707. public function is_meta_allowed( $meta_key, $post ) {
  708. $test_metas = [
  709. $meta_key => true,
  710. ];
  711. $filtered_test_metas = $this->filter_allowed_metas( $test_metas, $post );
  712. return array_key_exists( $meta_key, $filtered_test_metas );
  713. }
  714. /**
  715. * Filter post meta to only the allowed ones to be send to ES
  716. *
  717. * @param array $metas Key => value pairs of post meta
  718. * @param WP_Post $post Post object
  719. * @since 4.3.0
  720. * @return array
  721. */
  722. public function filter_allowed_metas( $metas, $post ) {
  723. $filtered_metas = [];
  724. $search = \ElasticPress\Features::factory()->get_registered_feature( 'search' );
  725. if ( $search && ! empty( $search->weighting ) && 'manual' === $search->weighting->get_meta_mode() ) {
  726. $filtered_metas = $this->filter_allowed_metas_manual( $metas, $post );
  727. } else {
  728. $filtered_metas = $this->filter_allowed_metas_auto( $metas, $post );
  729. }
  730. return $filtered_metas;
  731. }
  732. /**
  733. * Prepare post meta to send to ES
  734. *
  735. * @param WP_Post $post Post object
  736. * @since 0.1.0
  737. * @return array
  738. */
  739. public function prepare_meta( $post ) {
  740. /**
  741. * Filter pre-prepare meta for a post
  742. *
  743. * @hook ep_prepare_meta_data
  744. * @param {array} $meta Meta data
  745. * @param {WP_Post} $post Post object
  746. * @return {array} New meta
  747. */
  748. $meta = apply_filters( 'ep_prepare_meta_data', (array) get_post_meta( $post->ID ), $post );
  749. if ( empty( $meta ) ) {
  750. /**
  751. * Filter final list of prepared meta.
  752. *
  753. * @hook ep_prepared_post_meta
  754. * @param {array} $prepared_meta Prepared meta
  755. * @param {WP_Post} $post Post object
  756. * @since 3.4
  757. * @return {array} Prepared meta
  758. */
  759. return apply_filters( 'ep_prepared_post_meta', [], $post );
  760. }
  761. $filtered_metas = $this->filter_allowed_metas( $meta, $post );
  762. $prepared_meta = [];
  763. foreach ( $filtered_metas as $key => $value ) {
  764. if ( ! empty( $key ) ) {
  765. $prepared_meta[ $key ] = maybe_unserialize( $value );
  766. }
  767. }
  768. /**
  769. * Filter final list of prepared meta.
  770. *
  771. * @hook ep_prepared_post_meta
  772. * @param {array} $prepared_meta Prepared meta
  773. * @param {WP_Post} $post Post object
  774. * @since 3.4
  775. * @return {array} Prepared meta
  776. */
  777. return apply_filters( 'ep_prepared_post_meta', $prepared_meta, $post );
  778. }
  779. /**
  780. * Format WP query args for ES
  781. *
  782. * @param array $args WP_Query arguments.
  783. * @param WP_Query $wp_query WP_Query object
  784. * @since 0.9.0
  785. * @return array
  786. */
  787. public function format_args( $args, $wp_query ) {
  788. $args = $this->sanitize_wp_query_args( $args );
  789. $formatted_args = [
  790. 'from' => $this->parse_from( $args ),
  791. 'size' => $this->parse_size( $args ),
  792. ];
  793. $filters = $this->parse_filters( $args, $wp_query );
  794. if ( ! empty( $filters ) ) {
  795. $formatted_args['post_filter'] = $filters;
  796. }
  797. $formatted_args = $this->maybe_set_search_fields( $formatted_args, $args );
  798. $formatted_args = $this->maybe_set_fields( $formatted_args, $args );
  799. $formatted_args = $this->maybe_orderby( $formatted_args, $args );
  800. $formatted_args = $this->maybe_add_sticky_posts( $formatted_args, $args );
  801. $formatted_args = $this->maybe_set_aggs( $formatted_args, $args, $filters );
  802. /**
  803. * Filter formatted Elasticsearch query (entire query)
  804. *
  805. * @hook ep_formatted_args
  806. * @param {array} $formatted_args Formatted Elasticsearch query
  807. * @param {array} $args WP_Query variables
  808. * @param {object} $wp_query WP_Query object
  809. * @return {array} New query
  810. */
  811. $formatted_args = apply_filters( 'ep_formatted_args', $formatted_args, $args, $wp_query );
  812. /**
  813. * Filter formatted Elasticsearch post query (entire query)
  814. *
  815. * @hook ep_post_formatted_args
  816. * @param {array} $formatted_args Formatted Elasticsearch query
  817. * @param {array} $args WP_Query variables
  818. * @param {object} $wp_query WP_Query object
  819. * @return {array} New query
  820. */
  821. $formatted_args = apply_filters( 'ep_post_formatted_args', $formatted_args, $args, $wp_query );
  822. return $formatted_args;
  823. }
  824. /**
  825. * Adjust the fuzziness parameter if needed.
  826. *
  827. * If using fields with type `long`, queries should not have a fuzziness parameter.
  828. *
  829. * @param array $query Current query
  830. * @param array $query_vars Query variables
  831. * @param string $search_text Search text
  832. * @param array $search_fields Search fields
  833. * @return array New query
  834. */
  835. public function adjust_query_fuzziness( $query, $query_vars, $search_text, $search_fields ) {
  836. if ( empty( array_intersect( $search_fields, [ 'ID', 'post_id', 'post_parent' ] ) ) ) {
  837. return $query;
  838. }
  839. if ( ! isset( $query['bool'] ) || ! isset( $query['bool']['should'] ) ) {
  840. return $query;
  841. }
  842. foreach ( $query['bool']['should'] as &$clause ) {
  843. if ( ! isset( $clause['multi_match'] ) ) {
  844. continue;
  845. }
  846. if ( isset( $clause['multi_match']['fuzziness'] ) ) {
  847. unset( $clause['multi_match']['fuzziness'] );
  848. }
  849. }
  850. return $query;
  851. }
  852. /**
  853. * Parse and build out our tax query.
  854. *
  855. * @access protected
  856. *
  857. * @param array $query Tax query
  858. * @return array
  859. */
  860. protected function parse_tax_query( $query ) {
  861. $tax_query = [
  862. 'tax_filter' => [],
  863. 'tax_must_not_filter' => [],
  864. ];
  865. $relation = '';
  866. foreach ( $query as $tax_queries ) {
  867. // If we have a nested tax query, recurse through that
  868. if ( is_array( $tax_queries ) && empty( $tax_queries['taxonomy'] ) ) {
  869. $result = $this->parse_tax_query( $tax_queries );
  870. $relation = ( ! empty( $tax_queries['relation'] ) ) ? strtolower( $tax_queries['relation'] ) : 'and';
  871. $filter_type = 'and' === $relation ? 'must' : 'should';
  872. // Set the proper filter type and must_not filter, as needed
  873. if ( ! empty( $result['tax_must_not_filter'] ) ) {
  874. $tax_query['tax_filter'][] = [
  875. 'bool' => [
  876. $filter_type => $result['tax_filter'],
  877. 'must_not' => $result['tax_must_not_filter'],
  878. ],
  879. ];
  880. } else {
  881. $tax_query['tax_filter'][] = [
  882. 'bool' => [
  883. $filter_type => $result['tax_filter'],
  884. ],
  885. ];
  886. }
  887. }
  888. // Parse each individual tax query part
  889. $single_tax_query = $tax_queries;
  890. if ( ! empty( $single_tax_query['taxonomy'] ) ) {
  891. $terms = isset( $single_tax_query['terms'] ) ? (array) $single_tax_query['terms'] : array();
  892. $field = $this->parse_tax_query_field( $single_tax_query['field'] );
  893. if ( 'slug' === $field ) {
  894. $terms = array_map( 'sanitize_title', $terms );
  895. }
  896. // Set up our terms object
  897. $terms_obj = array(
  898. 'terms.' . $single_tax_query['taxonomy'] . '.' . $field => array_values( array_filter( $terms ) ),
  899. );
  900. $operator = ( ! empty( $single_tax_query['operator'] ) ) ? strtolower( $single_tax_query['operator'] ) : 'in';
  901. switch ( $operator ) {
  902. case 'exists':
  903. /**
  904. * add support for "EXISTS" operator
  905. *
  906. * @since 2.5
  907. */
  908. $tax_query['tax_filter'][]['bool'] = array(
  909. 'must' => array(
  910. array(
  911. 'exists' => array(
  912. 'field' => key( $terms_obj ),
  913. ),
  914. ),
  915. ),
  916. );
  917. break;
  918. case 'not exists':
  919. /**
  920. * add support for "NOT EXISTS" operator
  921. *
  922. * @since 2.5
  923. */
  924. $tax_query['tax_filter'][]['bool'] = array(
  925. 'must_not' => array(
  926. array(
  927. 'exists' => array(
  928. 'field' => key( $terms_obj ),
  929. ),
  930. ),
  931. ),
  932. );
  933. break;
  934. case 'not in':
  935. /**
  936. * add support for "NOT IN" operator
  937. *
  938. * @since 2.1
  939. */
  940. // If "NOT IN" than it should filter as must_not
  941. $tax_query['tax_must_not_filter'][]['terms'] = $terms_obj;
  942. break;
  943. case 'and':
  944. /**
  945. * add support for "and" operator
  946. *
  947. * @since 2.4
  948. */
  949. $and_nest = array(
  950. 'bool' => array(
  951. 'must' => array(),
  952. ),
  953. );
  954. foreach ( $terms as $term ) {
  955. $and_nest['bool']['must'][] = array(
  956. 'terms' => array(
  957. 'terms.' . $single_tax_query['taxonomy'] . '.' . $field => (array) $term,
  958. ),
  959. );
  960. }
  961. $tax_query['tax_filter'][] = $and_nest;
  962. break;
  963. case 'in':
  964. default:
  965. /**
  966. * Default to IN operator
  967. */
  968. // Add the tax query filter
  969. $tax_query['tax_filter'][]['terms'] = $terms_obj;
  970. break;
  971. }
  972. }
  973. }
  974. return $tax_query;
  975. }
  976. /**
  977. * Parse an 'order' query variable and cast it to ASC or DESC as necessary.
  978. *
  979. * @since 1.1
  980. * @access protected
  981. *
  982. * @param string $order The 'order' query variable.
  983. * @return string The sanitized 'order' query variable.
  984. */
  985. protected function parse_order( $order ) {
  986. // Core will always set sort order to DESC for any invalid value,
  987. // so we can't do any automated testing of this function.
  988. // @codeCoverageIgnoreStart
  989. if ( ! is_string( $order ) || empty( $order ) ) {
  990. return 'desc';
  991. }
  992. // @codeCoverageIgnoreEnd
  993. if ( 'ASC' === strtoupper( $order ) ) {
  994. return 'asc';
  995. } else {
  996. return 'desc';
  997. }
  998. }
  999. /**
  1000. * Convert the alias to a properly-prefixed sort value.
  1001. *
  1002. * @since 1.1
  1003. * @access protected
  1004. *
  1005. * @param string $orderbys Alias or path for the field to order by.
  1006. * @param string $default_order Default order direction
  1007. * @param array $args Query args
  1008. * @return array
  1009. */
  1010. protected function parse_orderby( $orderbys, $default_order, $args ) {
  1011. $orderbys = $this->get_orderby_array( $orderbys );
  1012. $from_to = [
  1013. 'relevance' => '_score',
  1014. 'date' => 'post_date',
  1015. 'type' => 'post_type.raw',
  1016. 'modified' => 'post_modified',
  1017. 'name' => 'post_name.raw',
  1018. 'title' => 'post_title.sortable',
  1019. ];
  1020. $sort = [];
  1021. foreach ( $orderbys as $key => $value ) {
  1022. if ( is_string( $key ) ) {
  1023. $orderby_clause = $key;
  1024. $order = $value;
  1025. } else {
  1026. $orderby_clause = $value;
  1027. $order = $default_order;
  1028. }
  1029. if ( empty( $orderby_clause ) || 'rand' === $orderby_clause ) {
  1030. continue;
  1031. }
  1032. /**
  1033. * If `orderby` is 'none', WordPress will let the database decide on what should be used to order.
  1034. * It will use the primary key ASC.
  1035. */
  1036. if ( 'none' === $orderby_clause ) {
  1037. $orderby_clause = 'ID';
  1038. $order = 'asc';
  1039. }
  1040. if ( ! empty( $from_to[ $orderby_clause ] ) ) {
  1041. $orderby_clause = $from_to[ $orderby_clause ];
  1042. } else {
  1043. $orderby_clause = $this->parse_orderby_meta_fields( $orderby_clause, $args );
  1044. }
  1045. $sort[] = array(
  1046. $orderby_clause => array(
  1047. 'order' => $order,
  1048. ),
  1049. );
  1050. }
  1051. return $sort;
  1052. }
  1053. /**
  1054. * Try to parse orderby meta fields
  1055. *
  1056. * @since 4.6.0
  1057. * @param string $orderby_clause Current orderby value
  1058. * @param array $args Query args
  1059. * @return string New orderby value
  1060. */
  1061. protected function parse_orderby_meta_fields( $orderby_clause, $args ) {
  1062. global $wpdb;
  1063. $from_to_metatypes = [
  1064. 'num' => 'long',
  1065. 'numeric' => 'long',
  1066. 'binary' => 'value.sortable',
  1067. 'char' => 'value.sortable',
  1068. 'date' => 'date',
  1069. 'datetime' => 'datetime',
  1070. 'decimal' => 'double',
  1071. 'signed' => 'long',
  1072. 'time' => 'time',
  1073. 'unsigned' => 'long',
  1074. ];
  1075. // Code is targeting Elasticsearch directly
  1076. if ( preg_match( '/^meta\.(.*?)\.(.*)/', $orderby_clause, $match_meta ) ) {
  1077. return $orderby_clause;
  1078. }
  1079. // WordPress meta_value_* compatibility
  1080. if ( preg_match( '/^meta_value_?(.*)/', $orderby_clause, $match_type ) ) {
  1081. $meta_type = $from_to_metatypes[ strtolower( $match_type[1] ) ] ?? 'value.sortable';
  1082. }
  1083. if ( ! empty( $args['meta_key'] ) ) {
  1084. $meta_field = $args['meta_key'];
  1085. }
  1086. // Already have everything needed
  1087. if ( isset( $meta_type ) && isset( $meta_field ) ) {
  1088. return "meta.{$meta_field}.{$meta_type}";
  1089. }
  1090. // Don't have any other ways to guess
  1091. if ( empty( $args['meta_query'] ) ) {
  1092. return $orderby_clause;
  1093. }
  1094. $meta_query = new \WP_Meta_Query( $args['meta_query'] );
  1095. // Calling get_sql() to populate the WP_Meta_Query->clauses attribute
  1096. $meta_query->get_sql( 'post', $wpdb->posts, 'ID' );
  1097. $clauses = $meta_query->get_clauses();
  1098. // If it refers to a named meta_query clause
  1099. if ( ! empty( $clauses[ $orderby_clause ] ) ) {
  1100. $meta_field = $clauses[ $orderby_clause ]['key'];
  1101. $clause_meta_type = strtolower( $clauses[ $orderby_clause ]['type'] ?? $clauses[ $orderby_clause ]['cast'] );
  1102. } else {
  1103. /**
  1104. * At this point we:
  1105. * 1. Try to find the meta key in any meta_query clause and use the type WP found
  1106. * 2. If ordering by `meta_value*`, use the first meta_query clause
  1107. * 3. Give up and use the orderby clause as is (code could be capturing it later on)
  1108. */
  1109. $meta_keys_and_types = wp_list_pluck( $clauses, 'cast', 'key' );
  1110. if ( isset( $meta_keys_and_types[ $orderby_clause ] ) ) {
  1111. $meta_field = $orderby_clause;
  1112. $clause_meta_type = strtolower( $meta_keys_and_types[ $orderby_clause ] ?? $meta_keys_and_types[ $orderby_clause ] );
  1113. } elseif ( isset( $meta_type ) ) {
  1114. $primary_clause = reset( $clauses );
  1115. $meta_field = $primary_clause['key'];
  1116. } else {
  1117. unset( $meta_type );
  1118. unset( $meta_field );
  1119. }
  1120. }
  1121. if ( ! isset( $meta_type ) && isset( $clause_meta_type ) ) {
  1122. $meta_type = $from_to_metatypes[ $clause_meta_type ] ?? 'value.sortable';
  1123. }
  1124. if ( isset( $meta_type ) && isset( $meta_field ) ) {
  1125. $orderby_clause = "meta.{$meta_field}.{$meta_type}";
  1126. }
  1127. return $orderby_clause;
  1128. }
  1129. /**
  1130. * Get Order by args Array
  1131. *
  1132. * @param string|array $orderbys Order by string or array
  1133. * @since 2.1
  1134. * @return array
  1135. */
  1136. protected function get_orderby_array( $orderbys ) {
  1137. if ( ! is_array( $orderbys ) ) {
  1138. $orderbys = explode( ' ', $orderbys );
  1139. }
  1140. return $orderbys;
  1141. }
  1142. /**
  1143. * Given a mapping content, try to determine the version used.
  1144. *
  1145. * @since 3.6.3
  1146. *
  1147. * @param array $mapping Mapping content.
  1148. * @param string $index Index name
  1149. * @return string Version of the mapping being used.
  1150. */
  1151. protected function determine_mapping_version_based_on_existing( $mapping, $index ) {
  1152. if ( isset( $mapping[ $index ]['mappings']['post']['_meta']['mapping_version'] ) ) {
  1153. return $mapping[ $index ]['mappings']['post']['_meta']['mapping_version'];
  1154. }
  1155. if ( isset( $mapping[ $index ]['mappings']['_meta']['mapping_version'] ) ) {
  1156. return $mapping[ $index ]['mappings']['_meta']['mapping_version'];
  1157. }
  1158. /**
  1159. * Check for 7-0 mapping.
  1160. * If mapping has a `post` type, it can't be ES 7, as mapping types were removed in that release.
  1161. *
  1162. * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html
  1163. */
  1164. if ( ! isset( $mapping[ $index ]['mappings']['post'] ) ) {
  1165. return '7-0.php';
  1166. }
  1167. $post_mapping = $mapping[ $index ]['mappings']['post'];
  1168. /**
  1169. * Starting at this point, our tests rely on the post_title.fields.sortable field.
  1170. * As this field is present in all our mappings, if this field is not present in
  1171. * the mapping, this is a custom mapping.
  1172. *
  1173. * To have this code working with custom mappings, use the `ep_post_mapping_version_determined` filter.
  1174. */
  1175. if ( ! isset( $post_mapping['properties']['post_title']['fields']['sortable'] ) ) {
  1176. return 'unknown';
  1177. }
  1178. $post_title_sortable = $post_mapping['properties']['post_title']['fields']['sortable'];
  1179. /**
  1180. * Check for 5-2 mapping.
  1181. * Normalizers on keyword fields were only made available in ES 5.2
  1182. *
  1183. * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.2/release-notes-5.2.0.html
  1184. */
  1185. if ( isset( $post_title_sortable['normalizer'] ) ) {
  1186. return '5-2.php';
  1187. }
  1188. return 'unknown';
  1189. }
  1190. /**
  1191. * Given ES args, add aggregations to it.
  1192. *
  1193. * @since 4.1.0
  1194. * @param array $formatted_args Formatted Elasticsearch query
  1195. * @param array $agg Aggregation data.
  1196. * @param boolean $use_filters Whether filters should be used or not.
  1197. * @param array $filter Filters defined so far.
  1198. * @return array Formatted Elasticsearch query with the aggregation added.
  1199. */
  1200. protected function apply_aggregations( $formatted_args, $agg, $use_filters, $filter ) {
  1201. if ( empty( $agg['aggs'] ) ) {
  1202. return $formatted_args;
  1203. }
  1204. // Add a name to the aggregation if it was passed through
  1205. $agg_name = ( ! empty( $agg['name'] ) ) ? $agg['name'] : 'aggregation_name';
  1206. // Add/use the filter if warranted
  1207. if ( isset( $agg['use-filter'] ) && false !== $agg['use-filter'] && $use_filters ) {
  1208. // If a filter is being used, use it on the aggregation as well to receive relevant information to the query
  1209. $formatted_args['aggs'][ $agg_name ]['filter'] = $filter;
  1210. $formatted_args['aggs'][ $agg_name ]['aggs'] = $agg['aggs'];
  1211. } else {
  1212. $formatted_args['aggs'][ $agg_name ] = $agg['aggs'];
  1213. }
  1214. return $formatted_args;
  1215. }
  1216. /**
  1217. * Get the search algorithm that should be used.
  1218. *
  1219. * @since 4.3.0
  1220. * @param string $search_text Search term(s)
  1221. * @param array $search_fields Search fields
  1222. * @param array $query_vars Query vars
  1223. * @return SearchAlgorithm Instance of search algorithm to be used
  1224. */
  1225. public function get_search_algorithm( string $search_text, array $search_fields, array $query_vars ): \ElasticPress\SearchAlgorithm {
  1226. $search_algorithm_version_option = \ElasticPress\Utils\get_option( 'ep_search_algorithm_version', '4.0' );
  1227. /**
  1228. * Filter the algorithm version to be used.
  1229. *
  1230. * @since 3.5
  1231. * @hook ep_search_algorithm_version
  1232. * @param {string} $search_algorithm_version Algorithm version.
  1233. * @return {string} New algorithm version
  1234. */
  1235. $search_algorithm = apply_filters( 'ep_search_algorithm_version', $search_algorithm_version_option );
  1236. /**
  1237. * Filter the search algorithm to be used
  1238. *
  1239. * @hook ep_{$indexable_slug}_search_algorithm
  1240. * @since 4.3.0
  1241. * @param {string} $search_algorithm Slug of the search algorithm used as fallback
  1242. * @param {string} $search_term Search term
  1243. * @param {array} $search_fields Fields to be searched
  1244. * @param {array} $query_vars Query variables
  1245. * @return {string} New search algorithm slug
  1246. */
  1247. $search_algorithm = apply_filters( "ep_{$this->slug}_search_algorithm", $search_algorithm, $search_text, $search_fields, $query_vars );
  1248. return \ElasticPress\SearchAlgorithms::factory()->get( $search_algorithm );
  1249. }
  1250. /**
  1251. * Based on WP_Query arguments, parses the various filters that could be applied into the ES query.
  1252. *
  1253. * @since 4.4.0
  1254. * @param array $args WP_Query arguments
  1255. * @param WP_Query $query WP_Query object
  1256. * @return array
  1257. */
  1258. protected function parse_filters( $args, $query ) {
  1259. /**
  1260. * A note about the order of this array indices:
  1261. * As previously there was no way to access each part, some snippets might be accessing
  1262. * these filters by its usual numeric indices (see the array_values() call below.)
  1263. */
  1264. $filters = [
  1265. 'tax_query' => $this->parse_tax_queries( $args, $query ),
  1266. 'post_parent' => $this->parse_post_parent( $args ),
  1267. 'post_parent__in' => $this->parse_post_parent__in( $args ),
  1268. 'post_parent__not_in' => $this->parse_post_parent__not_in( $args ),
  1269. 'post__in' => $this->parse_post__in( $args ),
  1270. 'post_name__in' => $this->parse_post_name__in( $args ),
  1271. 'post__not_in' => $this->parse_post__not_in( $args ),
  1272. 'category__not_in' => $this->parse_category__not_in( $args ),
  1273. 'tag__not_in' => $this->parse_tag__not_in( $args ),
  1274. 'author' => $this->parse_author( $args ),
  1275. 'post_mime_type' => $this->parse_post_mime_type( $args ),
  1276. 'date' => $this->parse_date( $args ),
  1277. 'meta_query' => $this->parse_meta_queries( $args ),
  1278. 'post_type' => $this->parse_post_type( $args ),
  1279. 'post_status' => $this->parse_post_status( $args ),
  1280. ];
  1281. /**
  1282. * Filter the ES filters that will be applied to the ES query.
  1283. *
  1284. * Although each index of the `$filters` array contains the related WP Query argument,
  1285. * it will be removed before applied to the ES query.
  1286. *
  1287. * @hook ep_post_filters
  1288. * @param {array} Current filters
  1289. * @param {array} WP Query args
  1290. * @param {WP_Query} WP Query object
  1291. * @return {array} New filters
  1292. */
  1293. $filters = apply_filters( 'ep_post_filters', $filters, $args, $query );
  1294. $filters = array_values( array_filter( $filters ) );
  1295. if ( ! empty( $filters ) ) {
  1296. $filters = [
  1297. 'bool' => [
  1298. 'must' => $filters,
  1299. ],
  1300. ];
  1301. }
  1302. return $filters;
  1303. }
  1304. /**
  1305. * Sanitize WP_Query arguments to be used to create the ES query.
  1306. *
  1307. * Elasticsearch will error if a terms query contains empty items like an empty string.
  1308. *
  1309. * @since 4.4.0
  1310. * @param array $args WP_Query arguments
  1311. * @return array
  1312. */
  1313. protected function sanitize_wp_query_args( $args ) {
  1314. $keys_to_sanitize = [
  1315. 'author__in',
  1316. 'author__not_in',
  1317. 'category__and',
  1318. 'category__in',
  1319. 'category__not_in',
  1320. 'tag__and',
  1321. 'tag__in',
  1322. 'tag__not_in',
  1323. 'tag_slug__and',
  1324. 'tag_slug__in',
  1325. 'post_parent__in',
  1326. 'post_parent__not_in',
  1327. 'post__in',
  1328. 'post__not_in',
  1329. 'post_name__in',
  1330. ];
  1331. foreach ( $keys_to_sanitize as $key ) {
  1332. if ( ! isset( $args[ $key ] ) ) {
  1333. continue;
  1334. }
  1335. $args[ $key ] = array_filter( (array) $args[ $key ] );
  1336. }
  1337. return $args;
  1338. }
  1339. /**
  1340. * Parse the `from` clause of the ES Query.
  1341. *
  1342. * @since 4.4.0
  1343. * @param array $args WP_Query arguments
  1344. * @return int
  1345. */
  1346. protected function parse_from( $args ) {
  1347. $from = 0;
  1348. if ( isset( $args['offset'] ) ) {
  1349. $from = (int) $args['offset'];
  1350. }
  1351. if ( isset( $args['paged'] ) && $args['paged'] > 1 ) {
  1352. $from = $args['posts_per_page'] * ( $args['paged'] - 1 );
  1353. }
  1354. /**
  1355. * Fix negative offset. This happens, for example, on hierarchical post types.
  1356. *
  1357. * Ref: https://github.com/10up/ElasticPress/issues/2480
  1358. */
  1359. if ( $from < 0 ) {
  1360. $from = 0;
  1361. }
  1362. return $from;
  1363. }
  1364. /**
  1365. * Parse the `size` clause of the ES Query.
  1366. *
  1367. * @since 4.4.0
  1368. * @param array $args WP_Query arguments
  1369. * @return int
  1370. */
  1371. protected function parse_size( $args ) {
  1372. if ( empty( $args['posts_per_page'] ) ) {
  1373. return (int) get_option( 'posts_per_page' );
  1374. }
  1375. $posts_per_page = (int) $args['posts_per_page'];
  1376. // ES have a maximum size allowed so we have to convert "-1" to a maximum size.
  1377. if ( -1 === $posts_per_page ) {
  1378. /**
  1379. * Filter max result size if set to -1
  1380. *
  1381. * The request will return a HTTP 500 Internal Error if the size of the
  1382. * request is larger than the [index.max_result_window] parameter in ES.
  1383. * See the scroll api for a more efficient way to request large data sets.
  1384. *
  1385. * @hook ep_max_results_window
  1386. * @param {int} Max result window
  1387. * @return {int} New window
  1388. */
  1389. $posts_per_page = apply_filters( 'ep_max_results_window', 10000 );
  1390. }
  1391. return $posts_per_page;
  1392. }
  1393. /**
  1394. * Parse the order of results in the ES query. It could simply be a `sort` clause or a function score query if using RAND.
  1395. *
  1396. * @since 4.4.0
  1397. * @param array $formatted_args Formatted Elasticsearch query
  1398. * @param array $args WP_Query arguments
  1399. * @return array
  1400. */
  1401. protected function maybe_orderby( $formatted_args, $args ) {
  1402. /**
  1403. * Order and Orderby arguments
  1404. *
  1405. * Used for how Elasticsearch will sort results
  1406. *
  1407. * @since 1.1
  1408. */
  1409. // Set sort order, default is 'desc'.
  1410. if ( ! empty( $args['order'] ) ) {
  1411. $order = $this->parse_order( $args['order'] );
  1412. } else {
  1413. $order = 'desc';
  1414. }
  1415. // Default sort for non-searches to date.
  1416. if ( empty( $args['orderby'] ) && ( ! isset( $args['s'] ) || '' === $args['s'] ) ) {
  1417. /**
  1418. * Filter default post query order by
  1419. *
  1420. * @hook ep_set_default_sort
  1421. * @param {string} $sort Default sort
  1422. * @param {string $order Order direction
  1423. * @return {string} New default
  1424. */
  1425. $args['orderby'] = apply_filters( 'ep_set_default_sort', 'date', $order );
  1426. }
  1427. // Set sort type.
  1428. if ( ! empty( $args['orderby'] ) ) {
  1429. $formatted_args['sort'] = $this->parse_orderby( $args['orderby'], $order, $args );
  1430. } else {
  1431. // Default sort is to use the score (based on relevance).
  1432. $default_sort = array(
  1433. array(
  1434. '_score' => array(
  1435. 'order' => $order,
  1436. ),
  1437. ),
  1438. );
  1439. /**
  1440. * Filter the ES query order (`sort` clause)
  1441. *
  1442. * This filter is used in searches if `orderby` is not set in the WP_Query args.
  1443. * The default value is:
  1444. *
  1445. * $default_sort = array(
  1446. * array(
  1447. * '_score' => array(
  1448. * 'order' => $order,
  1449. * ),
  1450. * ),
  1451. * );
  1452. *
  1453. * @hook ep_set_sort
  1454. * @since 3.6.3
  1455. * @param {array} $sort Default sort.
  1456. * @param {string} $order Order direction
  1457. * @return {array} New default
  1458. */
  1459. $default_sort = apply_filters( 'ep_set_sort', $default_sort, $order );
  1460. $formatted_args['sort'] = $default_sort;
  1461. }
  1462. /**
  1463. * Order by 'rand' support
  1464. *
  1465. * Ref: https://github.com/elastic/elasticsearch/issues/1170
  1466. */
  1467. if ( ! empty( $args['orderby'] ) ) {
  1468. $orderbys = $this->get_orderby_array( $args['orderby'] );
  1469. if ( in_array( 'rand', $orderbys, true ) ) {
  1470. $formatted_args_query = $formatted_args['query'];
  1471. $formatted_args['query'] = [];
  1472. $formatted_args['query']['function_score']['query'] = $formatted_args_query;
  1473. $formatted_args['query']['function_score']['random_score'] = (object) [];
  1474. }
  1475. }
  1476. return $formatted_args;
  1477. }
  1478. /**
  1479. * Parse all taxonomy queries.
  1480. *
  1481. * Although the name may be misleading, it handles the `tax_query` argument. There is a `parse_tax_query` that handles each "small" query.
  1482. *
  1483. * @since 4.4.0
  1484. * @param array $args WP_Query arguments
  1485. * @param WP_Query $query WP_Query object
  1486. * @return array
  1487. */
  1488. protected function parse_tax_queries( $args, $query ) {
  1489. /**
  1490. * Tax Query support
  1491. *
  1492. * Support for the tax_query argument of WP_Query. Currently only provides support for the 'AND' relation
  1493. * between taxonomies. Field only supports slug, term_id, and name defaulting to term_id.
  1494. *
  1495. * @use field = slug
  1496. * terms array
  1497. * @since 0.9.1
  1498. */
  1499. if ( ! empty( $query->tax_query ) && ! empty( $query->tax_query->queries ) ) {
  1500. $args['tax_query'] = $query->tax_query->queries;
  1501. }
  1502. if ( empty( $args['tax_query'] ) ) {
  1503. return [];
  1504. }
  1505. // Main tax_query array for ES.
  1506. $es_tax_query = [];
  1507. $tax_queries = $this->parse_tax_query( $args['tax_query'] );
  1508. if ( ! empty( $tax_queries['tax_filter'] ) ) {
  1509. $relation = 'must';
  1510. if ( ! empty( $args['tax_query']['relation'] ) && 'or' === strtolower( $args['tax_query']['relation'] ) ) {
  1511. $relation = 'should';
  1512. }
  1513. $es_tax_query[ $relation ] = $tax_queries['tax_filter'];
  1514. }
  1515. if ( ! empty( $tax_queries['tax_must_not_filter'] ) ) {
  1516. $es_tax_query['must_not'] = $tax_queries['tax_must_not_filter'];
  1517. }
  1518. if ( ! empty( $es_tax_query ) ) {
  1519. return [ 'bool' => $es_tax_query ];
  1520. }
  1521. return [];
  1522. }
  1523. /**
  1524. * Parse the `post_parent` WP Query arg and transform it into an ES query clause.
  1525. *
  1526. * @since 4.4.0
  1527. * @param array $args WP_Query arguments
  1528. * @return array
  1529. */
  1530. protected function parse_post_parent( $args ) {
  1531. $has_post_parent = isset( $args['post_parent'] ) && ( in_array( $args['post_parent'], [ 0, '0' ], true ) || ! empty( $args['post_parent'] ) );
  1532. if ( ! $has_post_parent || 'any' === strtolower( $args['post_parent'] ) ) {
  1533. return [];
  1534. }
  1535. return [
  1536. 'bool' => [
  1537. 'must' => [
  1538. 'term' => [
  1539. 'post_parent' => (int) $args['post_parent'],
  1540. ],
  1541. ],
  1542. ],
  1543. ];
  1544. }
  1545. /**
  1546. * Parse the `post_parent__in` WP Query arg and transform it into an ES query clause.
  1547. *
  1548. * @since 4.5.0
  1549. * @param array $args WP_Query arguments
  1550. * @return array
  1551. */
  1552. protected function parse_post_parent__in( $args ) {
  1553. if ( empty( $args['post_parent__in'] ) ) {
  1554. return [];
  1555. }
  1556. return [
  1557. 'bool' => [
  1558. 'must' => [
  1559. 'terms' => [
  1560. 'post_parent' => array_values( (array) $args['post_parent__in'] ),
  1561. ],
  1562. ],
  1563. ],
  1564. ];
  1565. }
  1566. /**
  1567. * Parse the `post_parent__not_in` WP Query arg and transform it into an ES query clause.
  1568. *
  1569. * @since 4.5.0
  1570. * @param array $args WP_Query arguments
  1571. * @return array
  1572. */
  1573. protected function parse_post_parent__not_in( $args ) {
  1574. if ( empty( $args['post_parent__not_in'] ) ) {
  1575. return [];
  1576. }
  1577. return [
  1578. 'bool' => [
  1579. 'must_not' => [
  1580. 'terms' => [
  1581. 'post_parent' => array_values( (array) $args['post_parent__not_in'] ),
  1582. ],
  1583. ],
  1584. ],
  1585. ];
  1586. }
  1587. /**
  1588. * Parse the `post__in` WP Query arg and transform it into an ES query clause.
  1589. *
  1590. * @since 4.4.0
  1591. * @param array $args WP_Query arguments
  1592. * @return array
  1593. */
  1594. protected function parse_post__in( $args ) {
  1595. if ( empty( $args['post__in'] ) ) {
  1596. return [];
  1597. }
  1598. return [
  1599. 'bool' => [
  1600. 'must' => [
  1601. 'terms' => [
  1602. 'post_id' => array_values( (array) $args['post__in'] ),
  1603. ],
  1604. ],
  1605. ],
  1606. ];
  1607. }
  1608. /**
  1609. * Parse the `post_name__in` WP Query arg and transform it into an ES query clause.
  1610. *
  1611. * @since 4.4.0
  1612. * @param array $args WP_Query arguments
  1613. * @return array
  1614. */
  1615. protected function parse_post_name__in( $args ) {
  1616. if ( empty( $args['post_name__in'] ) ) {
  1617. return [];
  1618. }
  1619. return [
  1620. 'bool' => [
  1621. 'must' => [
  1622. 'terms' => [
  1623. 'post_name.raw' => array_values( (array) $args['post_name__in'] ),
  1624. ],
  1625. ],
  1626. ],
  1627. ];
  1628. }
  1629. /**
  1630. * Parse the `post__not_in` WP Query arg and transform it into an ES query clause.
  1631. *
  1632. * @since 4.4.0
  1633. * @param array $args WP_Query arguments
  1634. * @return array
  1635. */
  1636. protected function parse_post__not_in( $args ) {
  1637. if ( empty( $args['post__not_in'] ) ) {
  1638. return [];
  1639. }
  1640. return [
  1641. 'bool' => [
  1642. 'must_not' => [
  1643. 'terms' => [
  1644. 'post_id' => array_values( (array) $args['post__not_in'] ),
  1645. ],
  1646. ],
  1647. ],
  1648. ];
  1649. }
  1650. /**
  1651. * Parse the `category__not_in` WP Query arg and transform it into an ES query clause.
  1652. *
  1653. * @since 4.4.0
  1654. * @param array $args WP_Query arguments
  1655. * @return array
  1656. */
  1657. protected function parse_category__not_in( $args ) {
  1658. if ( empty( $args['category__not_in'] ) ) {
  1659. return [];
  1660. }
  1661. return [
  1662. 'bool' => [
  1663. 'must_not' => [
  1664. 'terms' => [
  1665. 'terms.category.term_id' => array_values( (array) $args['category__not_in'] ),
  1666. ],
  1667. ],
  1668. ],
  1669. ];
  1670. }
  1671. /**
  1672. * Parse the `tag__not_in` WP Query arg and transform it into an ES query clause.
  1673. *
  1674. * @since 4.4.0
  1675. * @param array $args WP_Query arguments
  1676. * @return array
  1677. */
  1678. protected function parse_tag__not_in( $args ) {
  1679. if ( empty( $args['tag__not_in'] ) ) {
  1680. return [];
  1681. }
  1682. return [
  1683. 'bool' => [
  1684. 'must_not' => [
  1685. 'terms' => [
  1686. 'terms.post_tag.term_id' => array_values( (array) $args['tag__not_in'] ),
  1687. ],
  1688. ],
  1689. ],
  1690. ];
  1691. }
  1692. /**
  1693. * Parse the various author-related WP Query args and transform them into ES query clauses.
  1694. *
  1695. * @since 4.4.0
  1696. * @param array $args WP_Query arguments
  1697. * @return array
  1698. */
  1699. protected function parse_author( $args ) {
  1700. if ( ! empty( $args['author'] ) ) {
  1701. return [
  1702. 'term' => [
  1703. 'post_author.id' => $args['author'],
  1704. ],
  1705. ];
  1706. }
  1707. if ( ! empty( $args['author_name'] ) ) {
  1708. // Since this was set to use the display name initially, there might be some code that used this feature.
  1709. // Let's ensure that any query vars coming in using author_name are in fact slugs.
  1710. // This was changed back in ticket #1622 to use the display name, so we removed the sanitize_user() call.
  1711. return [
  1712. 'term' => [
  1713. 'post_author.display_name' => $args['author_name'],
  1714. ],
  1715. ];
  1716. }
  1717. if ( ! empty( $args['author__in'] ) ) {
  1718. return [
  1719. 'bool' => [
  1720. 'must' => [
  1721. 'terms' => [
  1722. 'post_author.id' => array_values( (array) $args['author__in'] ),
  1723. ],
  1724. ],
  1725. ],
  1726. ];
  1727. }
  1728. if ( ! empty( $args['author__not_in'] ) ) {
  1729. return [
  1730. 'bool' => [
  1731. 'must_not' => [
  1732. 'terms' => [
  1733. 'post_author.id' => array_values( (array) $args['author__not_in'] ),
  1734. ],
  1735. ],
  1736. ],
  1737. ];
  1738. }
  1739. return [];
  1740. }
  1741. /**
  1742. * Parse the `post_mime_type` WP Query arg and transform it into an ES query clause.
  1743. *
  1744. * If we have array, it will be fool text search filter.
  1745. * If we have string(like filter images in media screen), we will have mime type "image" so need to check it as
  1746. * regexp filter.
  1747. *
  1748. * @since 4.4.0
  1749. * @param array $args WP_Query arguments
  1750. * @return array
  1751. */
  1752. protected function parse_post_mime_type( $args ) {
  1753. if ( empty( $args['post_mime_type'] ) ) {
  1754. return [];
  1755. }
  1756. if ( is_array( $args['post_mime_type'] ) ) {
  1757. $args_post_mime_type = [];
  1758. foreach ( $args['post_mime_type'] as $mime_type ) {
  1759. /**
  1760. * check if matches the MIME type pattern: type/subtype and
  1761. * leave an empty string as posts, pages and CPTs don't have a MIME type
  1762. */
  1763. if ( preg_match( '/^[-._a-z0-9]+\/[-._a-z0-9]+$/i', $mime_type ) || empty( $mime_type ) ) {
  1764. $args_post_mime_type[] = $mime_type;
  1765. } else {
  1766. $filtered_mime_type_by_type = wp_match_mime_types( $mime_type, wp_get_mime_types() );
  1767. $args_post_mime_type = array_merge( $args_post_mime_type, $filtered_mime_type_by_type[ $mime_type ] );
  1768. }
  1769. }
  1770. return [
  1771. 'terms' => [
  1772. 'post_mime_type' => $args_post_mime_type,
  1773. ],
  1774. ];
  1775. }
  1776. if ( is_string( $args['post_mime_type'] ) ) {
  1777. return [
  1778. 'regexp' => array(
  1779. 'post_mime_type' => $args['post_mime_type'] . '.*',
  1780. ),
  1781. ];
  1782. }
  1783. return [];
  1784. }
  1785. /**
  1786. * Parse the various date-related WP Query args and transform them into ES query clauses.
  1787. *
  1788. * @since 4.4.0
  1789. * @param array $args WP_Query arguments
  1790. * @return array
  1791. */
  1792. protected function parse_date( $args ) {
  1793. $date_filter = DateQuery::simple_es_date_filter( $args );
  1794. if ( ! empty( $date_filter ) ) {
  1795. return $date_filter;
  1796. }
  1797. if ( ! empty( $args['date_query'] ) ) {
  1798. $date_query = new DateQuery( $args['date_query'] );
  1799. $date_filter = $date_query->get_es_filter();
  1800. if ( array_key_exists( 'and', $date_filter ) ) {
  1801. return $date_filter['and'];
  1802. }
  1803. }
  1804. }
  1805. /**
  1806. * Parse all meta queries.
  1807. *
  1808. * Although the name may be misleading, it handles the `meta_query` argument. There is a `build_meta_query` that handles each "small" query.
  1809. *
  1810. * @since 4.4.0
  1811. * @param array $args WP_Query arguments
  1812. * @return array
  1813. */
  1814. protected function parse_meta_queries( $args ) {
  1815. /**
  1816. * 'meta_query' arg support.
  1817. *
  1818. * Relation supports 'AND' and 'OR'. 'AND' is the default. For each individual query, the
  1819. * following 'compare' values are supported: =, !=, EXISTS, NOT EXISTS. '=' is the default.
  1820. *
  1821. * @since 1.3
  1822. */
  1823. $meta_queries = ( ! empty( $args['meta_query'] ) ) ? $args['meta_query'] : [];
  1824. $meta_queries = ( new \WP_Meta_Query() )->sanitize_query( $meta_queries );
  1825. /**
  1826. * Todo: Support meta_type
  1827. */
  1828. /**
  1829. * Support `meta_key`, `meta_value`, `meta_value_num`, and `meta_compare` query args
  1830. */
  1831. if ( ! empty( $args['meta_key'] ) ) {
  1832. $meta_query_array = [
  1833. 'key' => $args['meta_key'],
  1834. ];
  1835. if ( isset( $args['meta_value'] ) && '' !== $args['meta_value'] ) {
  1836. $meta_query_array['value'] = $args['meta_value'];
  1837. } elseif ( isset( $args['meta_value_num'] ) && '' !== $args['meta_value_num'] ) {
  1838. $meta_query_array['value'] = $args['meta_value_num'];
  1839. }
  1840. if ( isset( $args['meta_compare'] ) ) {
  1841. $meta_query_array['compare'] = $args['meta_compare'];
  1842. }
  1843. if ( ! empty( $meta_queries ) ) {
  1844. $meta_queries = [
  1845. 'relation' => 'AND',
  1846. $meta_query_array,
  1847. $meta_queries,
  1848. ];
  1849. } else {
  1850. $meta_queries = [ $meta_query_array ];
  1851. }
  1852. }
  1853. if ( ! empty( $meta_queries ) ) {
  1854. // get meta query filter
  1855. $meta_filter = $this->build_meta_query( $meta_queries );
  1856. if ( ! empty( $meta_filter ) ) {
  1857. return $meta_filter;
  1858. }
  1859. }
  1860. return [];
  1861. }
  1862. /**
  1863. * Parse the `post_type` WP Query arg and transform it into an ES query clause.
  1864. *
  1865. * @since 4.4.0
  1866. * @param array $args WP_Query arguments
  1867. * @return array
  1868. */
  1869. protected function parse_post_type( $args ) {
  1870. /**
  1871. * If not set default to post. If search and not set, default to "any".
  1872. */
  1873. if ( ! empty( $args['post_type'] ) ) {
  1874. // should NEVER be "any" but just in case
  1875. if ( 'any' !== $args['post_type'] ) {
  1876. $post_types = (array) $args['post_type'];
  1877. $terms_map_name = 'terms';
  1878. return [
  1879. $terms_map_name => [
  1880. 'post_type.raw' => array_values( $post_types ),
  1881. ],
  1882. ];
  1883. }
  1884. } elseif ( empty( $args['s'] ) ) {
  1885. return [
  1886. 'term' => [
  1887. 'post_type.raw' => 'post',
  1888. ],
  1889. ];
  1890. }
  1891. return [];
  1892. }
  1893. /**
  1894. * Parse the `post_status` WP Query arg and transform it into an ES query clause.
  1895. *
  1896. * @since 4.4.0
  1897. * @param array $args WP_Query arguments
  1898. * @return array
  1899. */
  1900. protected function parse_post_status( $args ) {
  1901. /**
  1902. * Like WP_Query in search context, if no post_status is specified we default to "any". To
  1903. * be safe you should ALWAYS specify the post_status parameter UNLIKE with WP_Query.
  1904. *
  1905. * @since 2.1
  1906. */
  1907. if ( ! empty( $args['post_status'] ) ) {
  1908. // should NEVER be "any" but just in case
  1909. if ( 'any' !== $args['post_status'] ) {
  1910. $post_status = (array) ( is_string( $args['post_status'] ) ? explode( ',', $args['post_status'] ) : $args['post_status'] );
  1911. $post_status = array_map( 'trim', $post_status );
  1912. $terms_map_name = 'terms';
  1913. if ( count( $post_status ) < 2 ) {
  1914. $terms_map_name = 'term';
  1915. $post_status = $post_status[0];
  1916. }
  1917. return [
  1918. $terms_map_name => [
  1919. 'post_status' => is_array( $post_status ) ? array_values( $post_status ) : $post_status,
  1920. ],
  1921. ];
  1922. }
  1923. } else {
  1924. $statuses = get_post_stati( array( 'public' => true ) );
  1925. if ( is_admin() ) {
  1926. /**
  1927. * In the admin we will add protected and private post statuses to the default query
  1928. * per WP default behavior.
  1929. */
  1930. $statuses = array_merge(
  1931. $statuses,
  1932. get_post_stati(
  1933. array(
  1934. 'protected' => true,
  1935. 'show_in_admin_all_list' => true,
  1936. )
  1937. )
  1938. );
  1939. if ( is_user_logged_in() ) {
  1940. $statuses = array_merge( $statuses, get_post_stati( array( 'private' => true ) ) );
  1941. }
  1942. }
  1943. $statuses = array_values( $statuses );
  1944. $post_status_filter_type = 'terms';
  1945. return [
  1946. $post_status_filter_type => [
  1947. 'post_status' => $statuses,
  1948. ],
  1949. ];
  1950. }
  1951. return [];
  1952. }
  1953. /**
  1954. * If in a search context set search fields, otherwise query everything.
  1955. *
  1956. * @since 4.4.0
  1957. * @param array $formatted_args Formatted Elasticsearch query
  1958. * @param array $args WP_Query arguments
  1959. * @return array
  1960. */
  1961. protected function maybe_set_search_fields( $formatted_args, $args ) {
  1962. /**
  1963. * Allow for search field specification
  1964. *
  1965. * @since 1.0
  1966. */
  1967. if ( ! empty( $args['search_fields'] ) ) {
  1968. $search_field_args = $args['search_fields'];
  1969. $search_fields = [];
  1970. if ( ! empty( $search_field_args['taxonomies'] ) ) {
  1971. $taxes = (array) $search_field_args['taxonomies'];
  1972. foreach ( $taxes as $tax ) {
  1973. $search_fields[] = 'terms.' . $tax . '.name';
  1974. }
  1975. unset( $search_field_args['taxonomies'] );
  1976. }
  1977. if ( ! empty( $search_field_args['meta'] ) ) {
  1978. $metas = (array) $search_field_args['meta'];
  1979. foreach ( $metas as $meta ) {
  1980. $search_fields[] = 'meta.' . $meta . '.value';
  1981. }
  1982. unset( $search_field_args['meta'] );
  1983. }
  1984. if ( in_array( 'author_name', $search_field_args, true ) ) {
  1985. $search_fields[] = 'post_author.login';
  1986. $author_name_index = array_search( 'author_name', $search_field_args, true );
  1987. unset( $search_field_args[ $author_name_index ] );
  1988. }
  1989. $search_fields = array_merge( $search_field_args, $search_fields );
  1990. } else {
  1991. $search_fields = array(
  1992. 'post_title',
  1993. 'post_excerpt',
  1994. 'post_content',
  1995. );
  1996. }
  1997. /**
  1998. * Filter default post search fields
  1999. *
  2000. * If you are using the weighting engine, this filter should not be used.
  2001. * Instead, you should use the ep_weighting_configuration_for_search filter.
  2002. *
  2003. * @hook ep_search_fields
  2004. * @param {array} $search_fields Default search fields
  2005. * @param {array} $args WP Query arguments
  2006. * @return {array} New defaults
  2007. */
  2008. $search_fields = apply_filters( 'ep_search_fields', $search_fields, $args );
  2009. $search_text = ( ! empty( $args['s'] ) ) ? $args['s'] : '';
  2010. /**
  2011. * We are using ep_integrate instead of ep_match_all. ep_match_all will be
  2012. * supported for legacy code but may be deprecated and removed eventually.
  2013. *
  2014. * @since 1.3
  2015. */
  2016. if ( ! empty( $search_text ) ) {
  2017. add_filter( 'ep_post_formatted_args_query', [ $this, 'adjust_query_fuzziness' ], 100, 4 );
  2018. $search_algorithm = $this->get_search_algorithm( $search_text, $search_fields, $args );
  2019. $formatted_args['query'] = $search_algorithm->get_query( 'post', $search_text, $search_fields, $args );
  2020. } elseif ( ! empty( $args['ep_match_all'] ) || ! empty( $args['ep_integrate'] ) ) {
  2021. $formatted_args['query']['match_all'] = array(
  2022. 'boost' => 1,
  2023. );
  2024. }
  2025. return $formatted_args;
  2026. }
  2027. /**
  2028. * If needed bring sticky posts and order them.
  2029. *
  2030. * @since 4.4.0
  2031. * @param array $formatted_args Formatted Elasticsearch query
  2032. * @param array $args WP_Query arguments
  2033. * @return array
  2034. */
  2035. protected function maybe_add_sticky_posts( $formatted_args, $args ) {
  2036. /**
  2037. * Sticky posts support
  2038. */
  2039. // Check first if there's sticky posts and show them only in the front page
  2040. $sticky_posts = get_option( 'sticky_posts' );
  2041. $sticky_posts = ( is_array( $sticky_posts ) && empty( $sticky_posts ) ) ? false : $sticky_posts;
  2042. /**
  2043. * Filter whether to enable sticky posts for this request
  2044. *
  2045. * @hook ep_enable_sticky_posts
  2046. *
  2047. * @param {bool} $allow Allow sticky posts for this request
  2048. * @param {array} $args Query variables
  2049. * @param {array} $formatted_args EP formatted args
  2050. *
  2051. * @return {bool} $allow
  2052. */
  2053. $enable_sticky_posts = apply_filters( 'ep_enable_sticky_posts', is_home(), $args, $formatted_args );
  2054. if ( false !== $sticky_posts
  2055. && $enable_sticky_posts
  2056. && empty( $args['s'] )
  2057. && in_array( $args['ignore_sticky_posts'], array( 'false', 0, false ), true ) ) {
  2058. $new_sort = [
  2059. [
  2060. '_score' => [
  2061. 'order' => 'desc',
  2062. ],
  2063. ],
  2064. ];
  2065. $formatted_args['sort'] = array_merge( $new_sort, $formatted_args['sort'] );
  2066. $formatted_args_query = $formatted_args['query'];
  2067. $formatted_args['query'] = array();
  2068. $formatted_args['query']['function_score']['query'] = $formatted_args_query;
  2069. $formatted_args['query']['function_score']['functions'] = array(
  2070. // add extra weight to sticky posts to show them on top
  2071. (object) array(
  2072. 'filter' => array(
  2073. 'terms' => array( '_id' => $sticky_posts ),
  2074. ),
  2075. 'weight' => 20,
  2076. ),
  2077. );
  2078. }
  2079. return $formatted_args;
  2080. }
  2081. /**
  2082. * If needed set the `fields` ES query clause.
  2083. *
  2084. * @since 4.4.0
  2085. * @param array $formatted_args Formatted Elasticsearch query
  2086. * @param array $args WP_Query arguments
  2087. * @return array
  2088. */
  2089. protected function maybe_set_fields( $formatted_args, $args ) {
  2090. /**
  2091. * Support fields.
  2092. */
  2093. if ( isset( $args['fields'] ) ) {
  2094. switch ( $args['fields'] ) {
  2095. case 'ids':
  2096. $formatted_args['_source'] = array(
  2097. 'includes' => array(
  2098. 'post_id',
  2099. ),
  2100. );
  2101. break;
  2102. case 'id=>parent':
  2103. $formatted_args['_source'] = array(
  2104. 'includes' => array(
  2105. 'post_id',
  2106. 'post_parent',
  2107. ),
  2108. );
  2109. break;
  2110. }
  2111. }
  2112. return $formatted_args;
  2113. }
  2114. /**
  2115. * If needed set the `aggs` ES query clause.
  2116. *
  2117. * @since 4.4.0
  2118. * @param array $formatted_args Formatted Elasticsearch query.
  2119. * @param array $args WP_Query arguments
  2120. * @param array $filters Filters to be applied to the ES query
  2121. * @return array
  2122. */
  2123. protected function maybe_set_aggs( $formatted_args, $args, $filters ) {
  2124. /**
  2125. * Aggregations
  2126. */
  2127. if ( ! empty( $args['aggs'] ) && is_array( $args['aggs'] ) ) {
  2128. // Check if the array indexes are all numeric.
  2129. $agg_keys = array_keys( $args['aggs'] );
  2130. $agg_num_keys = array_filter( $agg_keys, 'is_int' );
  2131. $has_only_num_keys = count( $agg_num_keys ) === count( $args['aggs'] );
  2132. if ( $has_only_num_keys ) {
  2133. foreach ( $args['aggs'] as $agg ) {
  2134. $formatted_args = $this->apply_aggregations( $formatted_args, $agg, ! empty( $filters ), $filters );
  2135. }
  2136. } else {
  2137. // Single aggregation.
  2138. $formatted_args = $this->apply_aggregations( $formatted_args, $args['aggs'], ! empty( $filters ), $filters );
  2139. }
  2140. }
  2141. return $formatted_args;
  2142. }
  2143. /**
  2144. * Parse tax query field value.
  2145. *
  2146. * @since 4.4.0
  2147. * @param string $field Field name
  2148. * @return string
  2149. */
  2150. protected function parse_tax_query_field( string $field ): string {
  2151. $from_to = [
  2152. 'name' => 'name.raw',
  2153. 'slug' => 'slug',
  2154. 'term_taxonomy_id' => 'term_taxonomy_id',
  2155. ];
  2156. return $from_to[ $field ] ?? 'term_id';
  2157. }
  2158. /**
  2159. * Filter a list of meta keys down to those chosen by the user or
  2160. * allowed via a hook.
  2161. *
  2162. * This function is used when manual management of metadata fields is
  2163. * enabled. This is the default behaviour as of 5.0.0 and controlled by the
  2164. * `ep_meta_mode` filter.
  2165. *
  2166. * @param array $metas Key => value pairs of post meta
  2167. * @param WP_Post $post Post object
  2168. * @since 5.0.0
  2169. * @return array
  2170. */
  2171. protected function filter_allowed_metas_manual( $metas, $post ) {
  2172. $filtered_metas = [];
  2173. $search_feature = \ElasticPress\Features::factory()->get_registered_feature( 'search' );
  2174. if ( empty( $post->post_type ) ) {
  2175. return $filtered_metas;
  2176. }
  2177. $weighting = $search_feature->weighting->get_weighting_configuration_with_defaults();
  2178. $is_searchable = in_array( $search_feature, $search_feature->get_searchable_post_types(), true );
  2179. if ( empty( $weighting[ $post->post_type ] ) && $is_searchable ) {
  2180. return $filtered_metas;
  2181. }
  2182. /** This filter is documented in includes/classes/Indexable/Post/Post.php */
  2183. $allowed_protected_keys = apply_filters( 'ep_prepare_meta_allowed_protected_keys', [], $post );
  2184. $selected_keys = [];
  2185. if ( ! empty( $weighting[ $post->post_type ] ) ) {
  2186. $selected_keys = array_map(
  2187. function ( $field ) {
  2188. if ( false === strpos( $field, 'meta.' ) ) {
  2189. return null;
  2190. }
  2191. $field_name_parts = explode( '.', $field );
  2192. return $field_name_parts[1];
  2193. },
  2194. array_keys( $weighting[ $post->post_type ] )
  2195. );
  2196. $selected_keys = array_filter( $selected_keys );
  2197. }
  2198. /**
  2199. * Filter indexable meta keys for posts
  2200. *
  2201. * @hook ep_prepare_meta_allowed_keys
  2202. * @param {array} $keys Allowed keys
  2203. * @param {WP_Post} $post Post object
  2204. * @since 5.0.0
  2205. * @return {array} New keys
  2206. */
  2207. $allowed_keys = apply_filters( 'ep_prepare_meta_allowed_keys', array_merge( $allowed_protected_keys, $selected_keys ), $post );
  2208. foreach ( $metas as $key => $value ) {
  2209. if ( ! in_array( $key, $allowed_keys, true ) ) {
  2210. continue;
  2211. }
  2212. $filtered_metas[ $key ] = $value;
  2213. }
  2214. return $filtered_metas;
  2215. }
  2216. /**
  2217. * Filter a list of meta keys down to public keys or protected keys
  2218. * allowed via a hook.
  2219. *
  2220. * This function is used to filter meta keys when ElasticPress is in
  2221. * network mode or when the meta mode is set to `auto` via the
  2222. * `ep_meta_mode` hook. This was the default behaviour prior to 5.0.0.
  2223. *
  2224. * @param array $metas Key => value pairs of post meta
  2225. * @param WP_Post $post Post object
  2226. * @since 5.0.0
  2227. * @return array
  2228. */
  2229. protected function filter_allowed_metas_auto( $metas, $post ) {
  2230. $filtered_metas = [];
  2231. /**
  2232. * Filter indexable protected meta keys for posts
  2233. *
  2234. * @hook ep_prepare_meta_allowed_protected_keys
  2235. * @param {array} $keys Allowed protected keys
  2236. * @param {WP_Post} $post Post object
  2237. * @since 1.7
  2238. * @return {array} New keys
  2239. */
  2240. $allowed_protected_keys = apply_filters( 'ep_prepare_meta_allowed_protected_keys', [], $post );
  2241. /**
  2242. * Filter public keys to exclude from indexed post
  2243. *
  2244. * @hook ep_prepare_meta_excluded_public_keys
  2245. * @param {array} $keys Excluded protected keys
  2246. * @param {WP_Post} $post Post object
  2247. * @since 1.7
  2248. * @return {array} New keys
  2249. */
  2250. $excluded_public_keys = apply_filters( 'ep_prepare_meta_excluded_public_keys', [], $post );
  2251. foreach ( $metas as $key => $value ) {
  2252. $allow_index = false;
  2253. if ( is_protected_meta( $key ) ) {
  2254. if ( true === $allowed_protected_keys || in_array( $key, $allowed_protected_keys, true ) ) {
  2255. $allow_index = true;
  2256. }
  2257. } elseif ( true !== $excluded_public_keys && ! in_array( $key, $excluded_public_keys, true ) ) {
  2258. $allow_index = true;
  2259. }
  2260. /**
  2261. * Filter force whitelisting a meta key
  2262. *
  2263. * @hook ep_prepare_meta_whitelist_key
  2264. * @param {bool} $whitelist True to whitelist key
  2265. * @param {string} $key Meta key
  2266. * @param {WP_Post} $post Post object
  2267. * @return {bool} New whitelist value
  2268. */
  2269. if ( true === $allow_index || apply_filters( 'ep_prepare_meta_whitelist_key', false, $key, $post ) ) {
  2270. $filtered_metas[ $key ] = $value;
  2271. }
  2272. }
  2273. return $filtered_metas;
  2274. }
  2275. /**
  2276. * Return all distinct meta fields in the database.
  2277. *
  2278. * @since 4.4.0
  2279. * @param bool $force_refresh Whether to use or not a cached value. Default false, use cached.
  2280. * @return array
  2281. */
  2282. public function get_distinct_meta_field_keys_db( bool $force_refresh = false ): array {
  2283. global $wpdb;
  2284. /**
  2285. * Short-circuits the process of getting distinct meta keys from the database.
  2286. *
  2287. * Returning a non-null value will effectively short-circuit the function.
  2288. *
  2289. * @since 4.4.0
  2290. * @hook ep_post_pre_meta_keys_db
  2291. * @param {null} $meta_keys Distinct meta keys array
  2292. * @return {null|array} Distinct meta keys array or `null` to keep default behavior
  2293. */
  2294. $pre_meta_keys = apply_filters( 'ep_post_pre_meta_keys_db', null );
  2295. if ( null !== $pre_meta_keys ) {
  2296. return $pre_meta_keys;
  2297. }
  2298. $cache_key = 'ep_meta_field_keys';
  2299. if ( ! $force_refresh ) {
  2300. $cached = get_transient( $cache_key );
  2301. if ( false !== $cached ) {
  2302. $cached = (array) json_decode( (string) $cached );
  2303. /* this filter is documented below */
  2304. return (array) apply_filters( 'ep_post_meta_keys_db', $cached );
  2305. }
  2306. }
  2307. /**
  2308. * To avoid running a too expensive SQL query, we run a query getting all public keys
  2309. * and only the private keys allowed by the `ep_prepare_meta_allowed_protected_keys` filter.
  2310. * This query does not order by on purpose, as that also brings a performance penalty.
  2311. */
  2312. $allowed_protected_keys = apply_filters( 'ep_prepare_meta_allowed_protected_keys', [], new \WP_Post( (object) [] ) );
  2313. $allowed_protected_keys_sql = '';
  2314. if ( ! empty( $allowed_protected_keys ) ) {
  2315. $placeholders = implode( ',', array_fill( 0, count( $allowed_protected_keys ), '%s' ) );
  2316. $allowed_protected_keys_sql = " OR meta_key IN ( {$placeholders} ) ";
  2317. }
  2318. // phpcs:disable WordPress.DB.DirectDatabaseQuery, WordPress.DB.PreparedSQL.InterpolatedNotPrepared, WordPress.DB.PreparedSQLPlaceholders.ReplacementsWrongNumber
  2319. $meta_keys = $wpdb->get_col(
  2320. $wpdb->prepare(
  2321. "SELECT DISTINCT meta_key
  2322. FROM {$wpdb->postmeta}
  2323. WHERE meta_key NOT LIKE %s {$allowed_protected_keys_sql}
  2324. LIMIT 800",
  2325. '\_%',
  2326. ...$allowed_protected_keys
  2327. )
  2328. );
  2329. // phpcs:enable WordPress.DB.DirectDatabaseQuery, WordPress.DB.PreparedSQL.InterpolatedNotPrepared, WordPress.DB.PreparedSQLPlaceholders.ReplacementsWrongNumber
  2330. sort( $meta_keys );
  2331. // Make sure the size of the transient will not be bigger than 1MB
  2332. do {
  2333. $transient_size = strlen( wp_json_encode( $meta_keys ) );
  2334. if ( $transient_size >= MB_IN_BYTES ) {
  2335. array_pop( $meta_keys );
  2336. } else {
  2337. break;
  2338. }
  2339. } while ( true );
  2340. set_transient( $cache_key, wp_json_encode( $meta_keys ), DAY_IN_SECONDS );
  2341. /**
  2342. * Filter the distinct meta keys fetched from the database.
  2343. *
  2344. * @since 4.4.0
  2345. * @hook ep_post_meta_keys_db
  2346. * @param {array} $meta_keys Distinct meta keys array
  2347. * @return {array} New distinct meta keys array
  2348. */
  2349. return (array) apply_filters( 'ep_post_meta_keys_db', $meta_keys );
  2350. }
  2351. /**
  2352. * Return all distinct meta fields in the database per post type.
  2353. *
  2354. * @since 4.4.0
  2355. * @param string $post_type Post type slug
  2356. * @param bool $force_refresh Whether to use or not a cached value. Default false, use cached.
  2357. * @return array
  2358. */
  2359. public function get_distinct_meta_field_keys_db_per_post_type( string $post_type, bool $force_refresh = false ): array {
  2360. $allowed_screen = 'status-report' === \ElasticPress\Screen::factory()->get_current_screen();
  2361. /**
  2362. * Filter if the current screen is allowed or not to use the function.
  2363. *
  2364. * This method can be too resource intensive, use it with caution.
  2365. *
  2366. * @since 4.4.0
  2367. * @hook ep_post_meta_keys_db_per_post_type_allowed_screen
  2368. * @param {bool} $allowed_screen Whether this is an allowed screen or not.
  2369. * @return {bool} New value of $allowed_screen
  2370. */
  2371. if ( ! apply_filters( 'ep_post_meta_keys_db_per_post_type_allowed_screen', $allowed_screen ) ) {
  2372. _doing_it_wrong(
  2373. __METHOD__,
  2374. esc_html__( 'This method should not be called outside specific pages. Use the `ep_post_meta_keys_db_per_post_type_allowed_screen` filter if you need to use it in your custom screen.' ),
  2375. 'ElasticPress 4.4.0'
  2376. );
  2377. return [];
  2378. }
  2379. /**
  2380. * Short-circuits the process of getting distinct meta keys from the database per post type.
  2381. *
  2382. * Returning a non-null value will effectively short-circuit the function.
  2383. *
  2384. * @since 4.4.0
  2385. * @hook ep_post_pre_meta_keys_db_per_post_type
  2386. * @param {null} $meta_keys Distinct meta keys array
  2387. * @param {string} $post_type Post type slug
  2388. * @return {null|array} Distinct meta keys array or `null` to keep default behavior
  2389. */
  2390. $pre_meta_keys = apply_filters( 'ep_post_pre_meta_keys_db_per_post_type', null, $post_type );
  2391. if ( null !== $pre_meta_keys ) {
  2392. return $pre_meta_keys;
  2393. }
  2394. $cache_key = 'ep_meta_field_keys_' . $post_type;
  2395. if ( ! $force_refresh ) {
  2396. $cached = get_transient( $cache_key );
  2397. if ( false !== $cached ) {
  2398. $cached = (array) json_decode( (string) $cached );
  2399. /* this filter is documented below */
  2400. return (array) apply_filters( 'ep_post_meta_keys_db_per_post_type', $cached, $post_type );
  2401. }
  2402. }
  2403. $meta_keys = [];
  2404. $post_ids_batches = $this->get_lazy_post_type_ids( $post_type );
  2405. foreach ( $post_ids_batches as $post_ids ) {
  2406. $new_meta_keys = $this->get_meta_keys_from_post_ids( $post_ids );
  2407. $meta_keys = array_unique( array_merge( $meta_keys, $new_meta_keys ) );
  2408. }
  2409. // Make sure the size of the transient will not be bigger than 1MB
  2410. do {
  2411. $transient_size = strlen( wp_json_encode( $meta_keys ) );
  2412. if ( $transient_size >= MB_IN_BYTES ) {
  2413. array_pop( $meta_keys );
  2414. } else {
  2415. break;
  2416. }
  2417. } while ( true );
  2418. set_transient( $cache_key, wp_json_encode( $meta_keys ), DAY_IN_SECONDS );
  2419. /**
  2420. * Filter the distinct meta keys fetched from the database per post type.
  2421. *
  2422. * @since 4.4.0
  2423. * @hook ep_post_meta_keys_db_per_post_type
  2424. * @param {array} $meta_keys Distinct meta keys array
  2425. * @param {string} $post_type Post type slug
  2426. * @return {array} New distinct meta keys array
  2427. */
  2428. return (array) apply_filters( 'ep_post_meta_keys_db_per_post_type', $meta_keys, $post_type );
  2429. }
  2430. /**
  2431. * Return all distinct meta fields in the database per post type.
  2432. *
  2433. * @since 4.4.0
  2434. * @param string $post_type Post type slug
  2435. * @param bool $force_refresh Whether to use or not a cached value. Default false, use cached.
  2436. * @return array
  2437. */
  2438. public function get_indexable_meta_keys_per_post_type( string $post_type, bool $force_refresh = false ): array {
  2439. $mock_post = new \WP_Post( (object) [ 'post_type' => $post_type ] );
  2440. $meta_keys = $this->get_distinct_meta_field_keys_db_per_post_type( $post_type, $force_refresh );
  2441. $fake_meta_values = array_combine( $meta_keys, array_fill( 0, count( $meta_keys ), 'test-value' ) );
  2442. $filtered_meta = apply_filters( 'ep_prepare_meta_data', $fake_meta_values, $mock_post );
  2443. return array_filter(
  2444. array_keys( $filtered_meta ),
  2445. function ( $meta_key ) use ( $mock_post ) {
  2446. return $this->is_meta_allowed( $meta_key, $mock_post );
  2447. }
  2448. );
  2449. }
  2450. /**
  2451. * Return the meta keys that will (possibly) be indexed.
  2452. *
  2453. * This function gets all the meta keys in the database, creates a fake post without a type and with all the meta fields,
  2454. * runs the `ep_prepare_meta_data` filter against it and checks if meta keys are allowed or not.
  2455. * Although it provides a good indicator, it is not 100% correct as developers could create code using the
  2456. * `ep_prepare_meta_data` filter that would depend on "real" data.
  2457. *
  2458. * @since 4.4.0
  2459. * @param bool $force_refresh Whether to use or not a cached value. Default false, use cached.
  2460. * @return array
  2461. */
  2462. public function get_predicted_indexable_meta_keys( bool $force_refresh = false ): array {
  2463. $empty_post = new \WP_Post( (object) [] );
  2464. $meta_keys = $this->get_distinct_meta_field_keys_db( $force_refresh );
  2465. $fake_meta_values = array_combine(
  2466. $meta_keys,
  2467. array_fill( 0, count( $meta_keys ), $this->get_test_meta_value() )
  2468. );
  2469. $filtered_meta = apply_filters( 'ep_prepare_meta_data', $fake_meta_values, $empty_post );
  2470. $all_keys = array_filter(
  2471. array_keys( $filtered_meta ),
  2472. function ( $meta_key ) use ( $empty_post ) {
  2473. return $this->is_meta_allowed( $meta_key, $empty_post );
  2474. }
  2475. );
  2476. sort( $all_keys );
  2477. return $all_keys;
  2478. }
  2479. /**
  2480. * Return the value used to fill meta fields while predicting indexable content.
  2481. *
  2482. * @since 5.1.0
  2483. * @return string
  2484. */
  2485. public function get_test_meta_value(): string {
  2486. /**
  2487. * Filter the value used to fill meta fields while predicting indexable content.
  2488. *
  2489. * @hook ep_post_test_meta_value
  2490. * @since 5.1.0
  2491. * @param {string} $test_meta_value The test meta value. Default: test-value
  2492. * @return {string} New test meta value
  2493. */
  2494. return (string) apply_filters( 'ep_post_test_meta_value', 'test-value' );
  2495. }
  2496. /**
  2497. * Given a post type, *yields* their Post IDs.
  2498. *
  2499. * If post IDs are found, this function will return a PHP Generator. To avoid timeout, it will yield 8 groups or 11,000 IDs.
  2500. *
  2501. * @since 4.4.0
  2502. * @see https://www.php.net/manual/en/language.generators.overview.php
  2503. * @param string $post_type The post type slug
  2504. * @return iterator
  2505. */
  2506. protected function get_lazy_post_type_ids( string $post_type ) {
  2507. global $wpdb;
  2508. $total = $wpdb->get_var( // phpcs:ignore WordPress.DB.DirectDatabaseQuery
  2509. $wpdb->prepare(
  2510. "SELECT count(*) FROM {$wpdb->posts} WHERE post_type = %s",
  2511. $post_type
  2512. )
  2513. );
  2514. if ( ! $total ) {
  2515. return [];
  2516. }
  2517. /**
  2518. * Filter the number of IDs to be fetched per page to discover distinct meta fields per post type.
  2519. *
  2520. * @hook ep_post_meta_by_type_ids_per_page
  2521. * @since 4.4.0
  2522. * @param {int} $per_page Number of IDs
  2523. * @param {string} $post_type The post type slug
  2524. * @return {string} New number of IDs
  2525. */
  2526. $per_page = apply_filters( 'ep_post_meta_by_type_ids_per_page', 11000, $post_type );
  2527. $pages = min( ceil( $total / $per_page ), 8 );
  2528. /**
  2529. * Filter the number of times EP will fetch IDs from the database
  2530. *
  2531. * @hook ep_post_meta_by_type_number_of_pages
  2532. * @since 4.4.0
  2533. * @param {int} $pages Number of "pages" (not WP post type)
  2534. * @param {int} $per_page Number of IDs per page
  2535. * @param {string} $post_type The post type slug
  2536. * @return {string} New number of pages
  2537. */
  2538. $pages = apply_filters( 'ep_post_meta_by_type_number_of_pages', $pages, $per_page, $post_type );
  2539. for ( $page = 0; $page < $pages; $page++ ) {
  2540. $start = $per_page * $page;
  2541. $ids = $wpdb->get_col( // phpcs:ignore WordPress.DB.DirectDatabaseQuery
  2542. $wpdb->prepare(
  2543. "SELECT ID FROM {$wpdb->posts} WHERE post_type = %s LIMIT %d, %d",
  2544. $post_type,
  2545. $start,
  2546. $per_page
  2547. )
  2548. );
  2549. yield $ids;
  2550. }
  2551. }
  2552. /**
  2553. * Given a set of post IDs, return distinct meta keys associated with them.
  2554. *
  2555. * @since 4.4.0
  2556. * @param array $post_ids Set of post IDs
  2557. * @return array
  2558. */
  2559. protected function get_meta_keys_from_post_ids( array $post_ids ): array {
  2560. global $wpdb;
  2561. if ( empty( $post_ids ) ) {
  2562. return [];
  2563. }
  2564. $placeholders = implode( ',', array_fill( 0, count( $post_ids ), '%d' ) );
  2565. $meta_keys = $wpdb->get_col( // phpcs:ignore WordPress.DB.DirectDatabaseQuery
  2566. $wpdb->prepare(
  2567. // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared, WordPress.DB.PreparedSQLPlaceholders.UnfinishedPrepare
  2568. "SELECT DISTINCT meta_key FROM {$wpdb->postmeta} WHERE post_id IN ( {$placeholders} )",
  2569. $post_ids
  2570. )
  2571. );
  2572. return $meta_keys;
  2573. }
  2574. /**
  2575. * Add a `term_suggest` field to the mapping.
  2576. *
  2577. * This method assumes the `edge_ngram_analyzer` analyzer was already added to the mapping.
  2578. *
  2579. * @since 4.5.0
  2580. * @param array $mapping The mapping array
  2581. * @return array
  2582. */
  2583. public function add_term_suggest_field( array $mapping ): array {
  2584. if ( version_compare( (string) Elasticsearch::factory()->get_elasticsearch_version(), '7.0', '<' ) ) {
  2585. $mapping_properties = &$mapping['mappings']['post']['properties'];
  2586. } else {
  2587. $mapping_properties = &$mapping['mappings']['properties'];
  2588. }
  2589. $text_type = $mapping_properties['post_content']['type'];
  2590. $mapping_properties['term_suggest'] = array(
  2591. 'type' => $text_type,
  2592. 'analyzer' => 'edge_ngram_analyzer',
  2593. 'search_analyzer' => 'standard',
  2594. );
  2595. return $mapping;
  2596. }
  2597. /**
  2598. * Return all meta data added to the Weighting Dashboard plus all allowed keys via code.
  2599. *
  2600. * @since 5.1.4
  2601. * @return array
  2602. */
  2603. public function get_all_allowed_metas_manual(): array {
  2604. $post_types = \ElasticPress\Indexables::factory()->get( 'post' )->get_indexable_post_types();
  2605. $search_feature = \ElasticPress\Features::factory()->get_registered_feature( 'search' );
  2606. $weighting = $search_feature->weighting->get_weighting_configuration_with_defaults();
  2607. $fake_post = new \WP_Post( new \stdClass() );
  2608. $all_allowed_metas = [];
  2609. foreach ( $post_types as $post_type ) {
  2610. $fake_post->post_type = $post_type;
  2611. $allowed_protected_keys = apply_filters( 'ep_prepare_meta_allowed_protected_keys', [], $fake_post );
  2612. $selected_keys = [];
  2613. if ( ! empty( $weighting[ $post_type ] ) ) {
  2614. $selected_keys = array_map(
  2615. function ( $field ) {
  2616. if ( false === strpos( $field, 'meta.' ) ) {
  2617. return null;
  2618. }
  2619. $field_name_parts = explode( '.', $field );
  2620. return $field_name_parts[1];
  2621. },
  2622. array_keys( $weighting[ $post_type ] )
  2623. );
  2624. $selected_keys = array_filter( $selected_keys );
  2625. }
  2626. $allowed_keys = apply_filters( 'ep_prepare_meta_allowed_keys', array_merge( $allowed_protected_keys, $selected_keys ), $fake_post );
  2627. $all_allowed_metas = array_merge( $all_allowed_metas, $allowed_keys );
  2628. }
  2629. return array_unique( $all_allowed_metas );
  2630. }
  2631. }