_indexable_id, $include_existing_links, $post_type, $only_include_public ); $indexable_ids = \array_keys( $suggestions_scores ); // Return the empty list if no suggestions have been found. if ( empty( $indexable_ids ) ) { return []; } // Retrieve indexables for suggestions. $suggestions_indexables = $this->indexable_repository->query()->where_id_in( $indexable_ids )->find_many(); /** * Filter 'wpseo_link_suggestions_indexables' - Allow filtering link suggestions indexable objects. * * @param array $suggestions An array of suggestion indexables that can be filtered. * @param int $object_id The object id for the current indexable. * @param string $object_type The object type for the current indexable. */ $suggestions_indexables = \apply_filters( 'wpseo_link_suggestions_indexables', $suggestions_indexables, $object_id, $object_type ); // Create suggestions objects. return $this->create_suggestions( $suggestions_indexables, $suggestions_scores ); } /** * Suggests a list of links, based on the given array of prominent words. * * @param int $id The object id for the current indexable. * @param int $limit The maximum number of link suggestions to retrieve. * @param bool $include_existing_links Optional. Whether or not to include existing links, defaults to true. * * @return array Links for the post that are suggested. */ public function get_indexable_suggestions_for_indexable( $id, $limit, $include_existing_links = true ) { $weighted_words = []; $prominent_words = $this->prominent_words_repository->query() ->where( 'indexable_id', $id ) ->find_array(); foreach ( $prominent_words as $prominent_word ) { $weighted_words[ $prominent_word['stem'] ] = $prominent_word['weight']; } /* * Gets best suggestions (returns a sorted array [$indexable_id => score]). * The indexables are processed in batches of 1000 indexables each. */ $suggestions_scores = $this->retrieve_suggested_indexable_ids( $weighted_words, $limit, self::BATCH_SIZE, $id, $include_existing_links ); $indexable_ids = \array_keys( $suggestions_scores ); // Return the empty list if no suggestions have been found. if ( empty( $indexable_ids ) ) { return []; } // Retrieve indexables for suggestions. return $this->indexable_repository->query()->where_id_in( $indexable_ids )->find_array(); } /** * Retrieves the titles of the posts with the given IDs. * * @param array $post_ids The IDs of the posts to retrieve the titles of. * * @return array An array mapping post ID to title. */ protected function retrieve_posts( $post_ids ) { $query = new WP_Query( [ 'post_type' => $this->prominent_words_support->get_supported_post_types(), 'post__in' => $post_ids, 'posts_per_page' => \count( $post_ids ), ] ); $posts = $query->get_posts(); $post_data = []; foreach ( $posts as $post ) { $post_data[ $post->ID ] = [ 'title' => $post->post_title, ]; } return $post_data; } /** * Retrieves the names of the terms with the given IDs. * * @param Indexable[] $indexables The indexables to retrieve titles for. * * @return array An array mapping term ID to title. */ protected function retrieve_terms( $indexables ) { $data = []; foreach ( $indexables as $indexable ) { if ( $indexable->object_type !== 'term' ) { continue; } $term = \get_term_by( 'term_id', $indexable->object_id, $indexable->object_sub_type ); $data[ $indexable->object_id ] = [ 'title' => $term->name, ]; } return $data; } /** * Retrieves the titles of the given array of indexables. * * @param Indexable[] $indexables An array of indexables for which to retrieve the titles. * * @return array A two-dimensional array mapping object type and object id to title. */ protected function retrieve_object_titles( $indexables ) { $object_ids = []; foreach ( $indexables as $indexable ) { if ( \array_key_exists( $indexable->object_type, $object_ids ) ) { $object_ids[ $indexable->object_type ][] = $indexable->object_id; } else { $object_ids[ $indexable->object_type ] = [ $indexable->object_id ]; } } $objects = [ 'post' => [], 'term' => [], ]; // At the moment we only support internal linking for posts, so we only need the post titles. if ( \array_key_exists( 'post', $object_ids ) ) { $objects['post'] = $this->retrieve_posts( $object_ids['post'] ); } if ( \array_key_exists( 'term', $object_ids ) ) { $objects['term'] = $this->retrieve_terms( $indexables ); } return $objects; } /** * Computes, for a given indexable, its raw matching score on the request words to match. * In general, higher scores mean better matches. * * @param array $request_data The words to match, as an array containing stems, weights and dfs. * @param array $candidate_data The words to match against, as an array of `Prominent_Words` objects. * * @return float A raw score of the indexable. */ protected function compute_raw_score( $request_data, $candidate_data ) { $raw_score = 0; foreach ( $candidate_data as $stem => $candidate_word_data ) { if ( ! \array_key_exists( $stem, $request_data ) ) { continue; } $word_from_request_weight = $request_data[ $stem ]['weight']; $word_from_request_df = $request_data[ $stem ]['df']; $candidate_weight = $candidate_word_data['weight']; $candidate_df = $candidate_word_data['df']; $tf_idf_word_from_request = $this->prominent_words_helper->compute_tf_idf_score( $word_from_request_weight, $word_from_request_df ); $tf_idf_word_from_database = $this->prominent_words_helper->compute_tf_idf_score( $candidate_weight, $candidate_df ); // Score on this word is the product of the tf-idf scores. $raw_score += ( $tf_idf_word_from_request * $tf_idf_word_from_database ); } return $raw_score; } /** * Combines weight data of the request words to their document frequencies. This is needed to calculate * vector length of the request data. * * @param array $request_words An array mapping words to weights. * * @return array An array mapping stems, weights and dfs. */ protected function compose_request_data( $request_words ) { $request_doc_frequencies = $this->prominent_words_repository->count_document_frequencies( \array_keys( $request_words ) ); $combined_request_data = []; foreach ( $request_words as $stem => $weight ) { if ( ! isset( $request_doc_frequencies[ $stem ] ) ) { continue; } $combined_request_data[ $stem ] = [ 'weight' => (int) $weight, 'df' => $request_doc_frequencies[ $stem ], ]; } return $combined_request_data; } /** * Transforms the array of prominent words into an array of objects mapping indexable_id to an array * of prominent words associated with this indexable_id, with each prominent word's stem as a key. * * @param array $words The array of prominent words, with indexable_id as one of the keys. * * @return array An array mapping indexable IDs to their prominent words. */ protected function group_words_by_indexable_id( $words ) { $candidates_words_by_indexable_ids = []; foreach ( $words as $word ) { $indexable_id = $word->indexable_id; $candidates_words_by_indexable_ids[ $indexable_id ][ $word->stem ] = [ 'weight' => (int) $word->weight, 'df' => (int) $word->df, ]; } return $candidates_words_by_indexable_ids; } /** * Calculates a matching score for one candidate indexable. * * @param array $request_data An array matching stems from request to their weights and dfs. * @param float $request_vector_length The vector length of the request words. * @param array $candidate_data An array matching stems from the candidate to their weights and dfs. * * @return float A matching score for an indexable. */ protected function calculate_score_for_indexable( $request_data, $request_vector_length, $candidate_data ) { $raw_score = $this->compute_raw_score( $request_data, $candidate_data ); $candidate_vector_length = $this->prominent_words_helper->compute_vector_length( $candidate_data ); return $this->normalize_score( $raw_score, $candidate_vector_length, $request_vector_length ); } /** * In the prominent words repository, find a $batch_size of all ProminentWord-IndexableID pairs where * prominent words match the set of stems we are interested in. * Request prominent words for indexables in the batch (including the iDF of all words) to calculate * their vector length later. * * @param array $stems The stems in the request. * @param int $batch_size How many indexables to request in one query. * @param int $page The start of the current batch (in pages). * @param int[] $excluded_ids The indexable IDs to exclude. * @param array $post_type The post types that will be searched. * @param bool $only_include_public If only public indexables are included. * * @return array An array of ProminentWords objects, containing their stem, weight, indexable id, * and document frequency. */ protected function get_candidate_words( $stems, $batch_size, $page, $excluded_ids = [], $post_type = [], $only_include_public = false ) { return $this->prominent_words_repository->find_by_list_of_ids( $this->prominent_words_repository->find_ids_by_stems( $stems, $batch_size, $page, $excluded_ids, $post_type, $only_include_public ) ); } /** * For each candidate indexable, computes their matching score related to the request set of prominent words. * The candidate indexables are analyzed in batches. * After having computed scores for a batch the function saves the best candidates until now. * * @param array $request_words The words to match, as an array mapping words to weights. * @param int $limit The max number of suggestions that should be returned by the function. * @param int $batch_size The number of indexables that should be analyzed in every batch. * @param int|null $current_indexable_id The id for the current indexable. * @param bool $include_existing_links Optional. Whether or not to include existing links, defaults to true. * @param array $post_type Optional. The list of post types where suggestions may come from. * @param bool $only_include_public Optional. Only include public indexables, defaults to false. * * @return array An array mapping indexable IDs to scores. Higher scores mean better matches. */ protected function retrieve_suggested_indexable_ids( $request_words, $limit, $batch_size, $current_indexable_id, $include_existing_links = true, $post_type = [], $only_include_public = false ) { // Combine stems, weights and DFs from request. $request_data = $this->compose_request_data( $request_words ); // Calculate vector length of the request set (needed for score normalization later). $request_vector_length = $this->prominent_words_helper->compute_vector_length( $request_data ); // Get all links the post already links to, those shouldn't be suggested. $excluded_indexable_ids = [ $current_indexable_id ]; if ( ! $include_existing_links && $current_indexable_id ) { $links = $this->links_repository->query() ->distinct() ->select( 'indexable_id' ) ->where( 'target_indexable_id', $current_indexable_id ) ->find_many(); $excluded_indexable_ids = \array_merge( $excluded_indexable_ids, \wp_list_pluck( $links, 'indexable_id' ) ); } $excluded_indexable_ids = \array_filter( $excluded_indexable_ids ); $request_stems = \array_keys( $request_data ); $scores = []; $page = 1; do { // Retrieve the words of all indexables in this batch that share prominent word stems with request. $candidates_words = $this->get_candidate_words( $request_stems, $batch_size, $page, $excluded_indexable_ids, $post_type, $only_include_public ); // Transform the prominent words table so that it is indexed by indexable_ids. $candidates_words_by_indexable_ids = $this->group_words_by_indexable_id( $candidates_words ); $batch_scores_size = 0; foreach ( $candidates_words_by_indexable_ids as $id => $candidate_data ) { $scores[ $id ] = $this->calculate_score_for_indexable( $request_data, $request_vector_length, $candidate_data ); ++$batch_scores_size; } // Sort the list of scores and keep only the top $limit of the scores. $scores = $this->get_top_suggestions( $scores, $limit ); ++$page; } while ( $batch_scores_size === $batch_size ); return $scores; } /** * Normalizes the raw score based on the length of the prominent word vectors. * * @param float $raw_score The raw (non-normalized) score. * @param float $vector_length_candidate The vector lengths of the candidate indexable. * @param float $vector_length_request The vector length of the words from the request. * * @return int|float The score, normalized on vector lengths. */ protected function normalize_score( $raw_score, $vector_length_candidate, $vector_length_request ) { $normalizing_factor = ( $vector_length_request * $vector_length_candidate ); if ( $normalizing_factor === 0.0 ) { // We can't divide by 0, so set the score to 0 instead. return 0; } return ( $raw_score / $normalizing_factor ); } /** * Sorts the indexable ids based on the score and returns the top N indexable ids based on a specified limit. * (Returns all indexable ids if there are less indexable ids than specified by the limit.) * * @param array $scores The array matching indexable ids to their scores. * @param int $limit The maximum number of indexables that should be returned. * * @return array The top N indexable ids, sorted from highest to lowest score. */ protected function get_top_suggestions( $scores, $limit ) { // Sort the indexables by descending score. \uasort( $scores, static function ( $score_1, $score_2 ) { if ( $score_1 === $score_2 ) { return 0; } return ( ( $score_1 < $score_2 ) ? 1 : -1 ); } ); // Take the top $limit suggestions, while preserving their ids specified in the keys of the array elements. return \array_slice( $scores, 0, $limit, true ); } /** * Gets the singular label of the given combination of object type and sub type. * * @param string $object_type An object type. For example 'post' or 'term'. * @param string $object_sub_type An object sub type. For example 'page' or 'category'. * * @return string The singular label of the given combination of object type and sub type, * or the empty string if the singular label does not exist. */ protected function get_sub_type_singular_label( $object_type, $object_sub_type ) { switch ( $object_type ) { case 'post': $post_type = \get_post_type_object( $object_sub_type ); if ( $post_type ) { return $post_type->labels->singular_name; } break; case 'term': $taxonomy = \get_taxonomy( $object_sub_type ); if ( $taxonomy ) { return $taxonomy->labels->singular_name; } break; } return ''; } /** * Creates link suggestion data based on the indexables that should be suggested and the scores for these * indexables. * * @param Indexable[] $indexables The indexables for which to create linking suggestions. * @param array $scores The scores for the linking suggestions. * * @return array The internal linking suggestions. */ protected function create_suggestions( $indexables, $scores ) { $objects = $this->retrieve_object_titles( $indexables ); $link_suggestions = []; foreach ( $indexables as $indexable ) { if ( ! \array_key_exists( $indexable->object_type, $objects ) ) { continue; } // Object tied to this indexable. E.g. post, page, term. if ( ! \array_key_exists( $indexable->object_id, $objects[ $indexable->object_type ] ) ) { continue; } $link_suggestions[] = [ 'object_type' => $indexable->object_type, 'id' => (int) ( $indexable->object_id ), 'title' => $objects[ $indexable->object_type ][ $indexable->object_id ]['title'], 'link' => $indexable->permalink, 'isCornerstone' => (bool) $indexable->is_cornerstone, 'labels' => $this->get_labels( $indexable ), 'score' => \round( (float) ( $scores[ $indexable->id ] ), 2 ), ]; } /* * Because the request to the indexables table messes up with the ordering of the suggestions, * we have to sort again. */ $this->sort_suggestions_by_field( $link_suggestions, 'score' ); $cornerstone_suggestions = $this->filter_suggestions( $link_suggestions, true ); $non_cornerstone_suggestions = $this->filter_suggestions( $link_suggestions, false ); return \array_merge_recursive( [], $cornerstone_suggestions, $non_cornerstone_suggestions ); } /** * Retrieves the labels for the link suggestion. * * @param Indexable $indexable The indexable to determine the labels for. * * @return array The labels. */ protected function get_labels( Indexable $indexable ) { $labels = []; if ( $indexable->is_cornerstone ) { $labels[] = 'cornerstone'; } $labels[] = $this->get_sub_type_singular_label( $indexable->object_type, $indexable->object_sub_type ); return $labels; } /** * Sorts the given link suggestion by field. * * @param array $link_suggestions The link suggestions to sort. * @param string $field The field to sort suggestions by. * * @return void */ protected function sort_suggestions_by_field( array &$link_suggestions, $field ) { \usort( $link_suggestions, static function ( $suggestion_1, $suggestion_2 ) use ( $field ) { if ( $suggestion_1[ $field ] === $suggestion_2[ $field ] ) { return 0; } return ( ( $suggestion_1[ $field ] < $suggestion_2[ $field ] ) ? 1 : -1 ); } ); } /** * Filters the suggestions by cornerstone status. * * @param array $link_suggestions The suggestions to filter. * @param bool $cornerstone Whether or not to include the cornerstone suggestions. * * @return array The filtered suggestions. */ protected function filter_suggestions( $link_suggestions, $cornerstone ) { return \array_filter( $link_suggestions, static function ( $suggestion ) use ( $cornerstone ) { return (bool) $suggestion['isCornerstone'] === $cornerstone; } ); } } Archives des Electronique - Page 28 sur 28 - Amzi Group
Filtrer
Affichage de 3 sur 435 produits