<?php
namespace Classifai\Features;
use Classifai\Services\LanguageProcessing;
use Classifai\Providers\OpenAI\Whisper;
use WP_Error;
use WP_REST_Server;
use WP_REST_Request;
use function Classifai\get_asset_info;
use function Classifai\clean_input;
/**
* Class AudioTranscriptsGeneration
*/
class AudioTranscriptsGeneration extends Feature {
/**
* ID of the current feature.
*
* @var string
*/
const ID = 'feature_audio_transcripts_generation';
/**
* Constructor.
*/
public function __construct() {
$this->label = __( 'Audio Transcripts Generation', 'classifai' );
// Contains all providers that are registered to the service.
$this->provider_instances = $this->get_provider_instances( LanguageProcessing::get_service_providers() );
// Contains just the providers this feature supports.
$this->supported_providers = [
Whisper::ID => __( 'OpenAI Whisper', 'classifai' ),
];
}
/**
* Set up necessary hooks.
*
* We utilize this so we can register the REST route.
*/
public function setup() {
parent::setup();
add_action( 'rest_api_init', [ $this, 'register_endpoints' ] );
}
/**
* Set up necessary hooks.
*/
public function feature_setup() {
add_action( 'admin_enqueue_scripts', [ $this, 'enqueue_admin_assets' ] );
add_action( 'add_meta_boxes_attachment', [ $this, 'setup_attachment_meta_box' ] );
add_action( 'edit_attachment', [ $this, 'maybe_transcribe_audio' ] );
add_action( 'add_attachment', [ $this, 'transcribe_audio' ] );
add_filter( 'attachment_fields_to_edit', [ $this, 'add_buttons_to_media_modal' ], 10, 2 );
}
/**
* Register any needed endpoints.
*/
public function register_endpoints() {
register_rest_route(
'classifai/v1',
'generate-transcript/(?P<id>\d+)',
[
'methods' => WP_REST_Server::READABLE,
'callback' => [ $this, 'rest_endpoint_callback' ],
'args' => [
'id' => [
'required' => true,
'type' => 'integer',
'sanitize_callback' => 'absint',
'description' => esc_html__( 'Attachment ID to generate transcript for.', 'classifai' ),
],
],
'permission_callback' => [ $this, 'generate_audio_transcript_permissions_check' ],
]
);
}
/**
* Check if a given request has access to generate a transcript.
*
* This check ensures we have a valid user with proper capabilities
* making the request, that we are properly authenticated with OpenAI
* and that transcription is turned on.
*
* @param WP_REST_Request $request Full data about the request.
* @return WP_Error|bool
*/
public function generate_audio_transcript_permissions_check( WP_REST_Request $request ) {
$attachment_id = $request->get_param( 'id' );
$post_type = get_post_type_object( 'attachment' );
// Ensure attachments are allowed in REST endpoints.
if ( empty( $post_type ) || empty( $post_type->show_in_rest ) ) {
return false;
}
// Ensure we have a logged in user that can upload and change files.
if ( empty( $attachment_id ) || ! current_user_can( 'edit_post', $attachment_id ) || ! current_user_can( 'upload_files' ) ) {
return false;
}
if ( ! $this->is_feature_enabled() ) {
return new WP_Error( 'not_enabled', esc_html__( 'Audio transciption is not currently enabled.', 'classifai' ) );
}
return true;
}
/**
* Generic request handler for all our custom routes.
*
* @param WP_REST_Request $request The full request object.
* @return \WP_REST_Response
*/
public function rest_endpoint_callback( WP_REST_Request $request ) {
$route = $request->get_route();
if ( strpos( $route, '/classifai/v1/generate-transcript' ) === 0 ) {
$result = $this->run( $request->get_param( 'id' ), 'transcript' );
if ( ! is_wp_error( $result ) ) {
$result = $this->add_transcription( $result, $request->get_param( 'id' ) );
}
return rest_ensure_response( $result );
}
return parent::rest_endpoint_callback( $request );
}
/**
* Enqueue assets.
*/
public function enqueue_admin_assets() {
wp_enqueue_script(
'classifai-media-script',
CLASSIFAI_PLUGIN_URL . 'dist/media.js',
array_merge( get_asset_info( 'media', 'dependencies' ), array( 'jquery', 'media-editor', 'lodash' ) ),
get_asset_info( 'media', 'version' ),
true
);
}
/**
* Add new buttons to the media modal.
*
* @param array $form_fields Existing form fields.
* @param \WP_Post $attachment Attachment object.
* @return array
*/
public function add_buttons_to_media_modal( array $form_fields, \WP_Post $attachment ): array {
if ( ! $this->should_process( $attachment->ID ) ) {
return $form_fields;
}
$text = empty( get_the_content( null, false, $attachment ) ) ? __( 'Transcribe', 'classifai' ) : __( 'Re-transcribe', 'classifai' );
$form_fields['retranscribe'] = [
'label' => __( 'Transcribe audio', 'classifai' ),
'input' => 'html',
'html' => '<button class="button secondary" id="classifai-retranscribe" data-id="' . esc_attr( absint( $attachment->ID ) ) . '">' . esc_html( $text ) . '</button><span class="spinner" style="display:none;float:none;"></span><span class="error" style="display:none;color:#bc0b0b;padding:5px;"></span>',
'show_in_edit' => false,
];
return $form_fields;
}
/**
* Add metabox on single attachment view to allow for transcription.
*
* @param \WP_Post $post Post object.
*/
public function setup_attachment_meta_box( \WP_Post $post ) {
if ( ! $this->should_process( $post->ID ) ) {
return;
}
add_meta_box(
'attachment_meta_box',
__( 'ClassifAI Audio Processing', 'classifai' ),
[ $this, 'attachment_meta_box' ],
'attachment',
'side',
'high'
);
}
/**
* Display the attachment meta box.
*
* @param \WP_Post $post Post object.
*/
public function attachment_meta_box( \WP_Post $post ) {
$text = empty( get_the_content( null, false, $post ) ) ? __( 'Transcribe', 'classifai' ) : __( 'Re-transcribe', 'classifai' );
wp_nonce_field( 'classifai_audio_transcript_meta_action', 'classifai_audio_transcript_meta' );
?>
<div class="misc-publishing-actions">
<div class="misc-pub-section">
<label for="retranscribe">
<input type="checkbox" value="yes" id="retranscribe" name="retranscribe"/>
<?php echo esc_html( $text ); ?>
</label>
</div>
</div>
<?php
}
/**
* Transcribe audio and save transcription result.
*
* @param int $attachment_id Attachment ID.
* @return string|WP_Error
*/
public function transcribe_audio( int $attachment_id ) {
$result = $this->run( $attachment_id, 'transcript' );
if ( ! is_wp_error( $result ) ) {
$result = $this->add_transcription( $result, $attachment_id );
}
return $result;
}
/**
* Transcribe audio on attachment save, if option is selected.
*
* @param int $attachment_id Attachment ID.
* @return WP_Error|string|null
*/
public function maybe_transcribe_audio( int $attachment_id ) {
if ( ( defined( 'DOING_AUTOSAVE' ) && DOING_AUTOSAVE ) || ! current_user_can( 'edit_post', $attachment_id ) ) {
return;
}
if ( empty( $_POST['classifai_audio_transcript_meta'] ) || ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_POST['classifai_audio_transcript_meta'] ) ), 'classifai_audio_transcript_meta_action' ) ) {
return;
}
if ( clean_input( 'retranscribe' ) ) {
// Remove to avoid infinite loop.
remove_action( 'edit_attachment', [ $this, 'maybe_transcribe_audio' ] );
return $this->transcribe_audio( $attachment_id );
}
}
/**
* Get the description for the enable field.
*
* @return string
*/
public function get_enable_description(): string {
return esc_html__( 'Enabling this will automatically generate transcripts for supported audio files.', 'classifai' );
}
/**
* Returns the default settings for the feature.
*
* @return array
*/
public function get_feature_default_settings(): array {
return [
'provider' => Whisper::ID,
];
}
/**
* Should this attachment be processed.
*
* Ensure the file is a supported format and is under the maximum file size.
*
* @param int $attachment_id Attachment ID to process.
* @return bool
*/
public function should_process( int $attachment_id ): bool {
$settings = $this->get_settings();
$provider_id = $settings['provider'];
$provider_instance = $this->get_feature_provider_instance( $provider_id );
$mime_type = get_post_mime_type( $attachment_id );
$matched_extensions = explode( '|', array_search( $mime_type, wp_get_mime_types(), true ) );
$process = false;
foreach ( $matched_extensions as $ext ) {
if ( in_array( $ext, $provider_instance->file_formats ?? [ 'mp3' ], true ) ) {
$process = true;
}
}
// If we have a proper file format, check the file size.
if ( $process ) {
$filesize = filesize( get_attached_file( $attachment_id ) );
if ( ! $filesize || $filesize > $provider_instance->max_file_size ?? 25 * MB_IN_BYTES ) {
$process = false;
}
}
return $process;
}
/**
* Add the transcribed text to the attachment.
*
* @param string $text Transcription result.
* @param int $attachment_id Attachment ID.
* @return string|WP_Error
*/
public function add_transcription( string $text = '', int $attachment_id = 0 ) {
if ( empty( $text ) ) {
return new WP_Error( 'invalid_result', esc_html__( 'The transcription result is invalid.', 'classifai' ) );
}
/**
* Filter the text result returned from Whisper API.
*
* @since 2.2.0
* @hook classifai_whisper_transcribe_result
*
* @param {string} $text Text extracted from the response.
* @param {int} $attachment_id The attachment ID.
*
* @return {string}
*/
$text = apply_filters( 'classifai_whisper_transcribe_result', $text, $attachment_id );
$update = wp_update_post(
[
'ID' => (int) $attachment_id,
'post_content' => wp_kses_post( $text ),
],
true
);
if ( is_wp_error( $update ) ) {
return $update;
} else {
return $text;
}
}
/**
* Generates feature setting data required for migration from
* ClassifAI < 3.0.0 to 3.0.0
*
* @return array
*/
public function migrate_settings() {
$old_settings = get_option( 'classifai_openai_whisper', array() );
$new_settings = $this->get_settings();
if ( isset( $old_settings['enable_transcripts'] ) ) {
$new_settings['status'] = $old_settings['enable_transcripts'];
}
$new_settings['provider'] = 'openai_whisper';
if ( isset( $old_settings['api_key'] ) ) {
$new_settings['openai_whisper']['api_key'] = $old_settings['api_key'];
}
if ( isset( $old_settings['authenticated'] ) ) {
$new_settings['openai_whisper']['authenticated'] = $old_settings['authenticated'];
}
if ( isset( $old_settings['speech_to_text_users'] ) ) {
$new_settings['users'] = $old_settings['speech_to_text_users'];
}
if ( isset( $old_settings['speech_to_text_roles'] ) ) {
$new_settings['roles'] = $old_settings['speech_to_text_roles'];
}
if ( isset( $old_settings['speech_to_text_user_based_opt_out'] ) ) {
$new_settings['user_based_opt_out'] = $old_settings['speech_to_text_user_based_opt_out'];
}
return $new_settings;
}
}