Source: Providers/Localhost/OllamaMultimodal.php

  1. <?php
  2. /**
  3. * Ollama Multimodal integration
  4. */
  5. namespace Classifai\Providers\Localhost;
  6. use Classifai\Features\DescriptiveTextGenerator;
  7. use Classifai\Features\ImageTagsGenerator;
  8. use Classifai\Features\ImageTextExtraction;
  9. use Classifai\Providers\OpenAI\APIRequest;
  10. use WP_Error;
  11. use function Classifai\get_largest_size_and_dimensions_image_url;
  12. use function Classifai\get_modified_image_source_url;
  13. use function Classifai\computer_vision_max_filesize;
  14. use function Classifai\get_default_prompt;
  15. /**
  16. * Ollama Multimodal class
  17. */
  18. class OllamaMultimodal extends Ollama {
  19. /**
  20. * The Provider ID.
  21. */
  22. const ID = 'ollama_multimodal';
  23. /**
  24. * Connects to Ollama and retrieves supported models.
  25. *
  26. * @param array $args Overridable args.
  27. * @return array
  28. */
  29. public function get_models( array $args = [] ): array {
  30. $models = parent::get_models( $args );
  31. $supported_models = [
  32. 'llava',
  33. 'bakllava',
  34. 'llama3.2-vision',
  35. 'llava-llama3',
  36. 'moondream',
  37. 'minicpm-v',
  38. 'llava-phi3',
  39. ];
  40. // Ensure our model list only contains the ones we support.
  41. foreach ( $models as $key => $model ) {
  42. $model = explode( ':', $model );
  43. if ( ! in_array( $model[0], $supported_models, true ) ) {
  44. unset( $models[ $key ] );
  45. }
  46. }
  47. return $models;
  48. }
  49. /**
  50. * Returns the default settings for this provider.
  51. *
  52. * @return array
  53. */
  54. public function get_default_provider_settings(): array {
  55. $common_settings = parent::get_default_provider_settings();
  56. /**
  57. * Default values for feature specific settings.
  58. */
  59. switch ( $this->feature_instance::ID ) {
  60. case DescriptiveTextGenerator::ID:
  61. case ImageTagsGenerator::ID:
  62. case ImageTextExtraction::ID:
  63. $common_settings['prompt'] = [
  64. [
  65. 'title' => esc_html__( 'ClassifAI default', 'classifai' ),
  66. 'prompt' => $this->feature_instance->prompt,
  67. 'original' => 1,
  68. 'default' => 1,
  69. ],
  70. ];
  71. break;
  72. }
  73. return $common_settings;
  74. }
  75. /**
  76. * Generic request handler for multimodal LLMs run by Ollama.
  77. *
  78. * @param string $url Request URL.
  79. * @param array $body Request body.
  80. * @return string|WP_Error
  81. */
  82. public function request( string $url, array $body ) {
  83. // Make our API request.
  84. $request = new APIRequest( 'test' );
  85. $response = $request->post(
  86. $url,
  87. [
  88. 'body' => wp_json_encode( $body ),
  89. ]
  90. );
  91. // Return the error if we have one.
  92. if ( is_wp_error( $response ) ) {
  93. return $response;
  94. }
  95. // If we have a message, return it.
  96. $return = '';
  97. if ( isset( $response['response'] ) ) {
  98. $return = sanitize_text_field( trim( $response['response'], ' "\'' ) );
  99. }
  100. return $return;
  101. }
  102. /**
  103. * Generate descriptive alt text for an image.
  104. *
  105. * @param string $image_url URL of image to process.
  106. * @param int $attachment_id Post ID for the attachment.
  107. * @return string|WP_Error
  108. */
  109. public function generate_alt_text( string $image_url, int $attachment_id ) {
  110. if ( ! wp_attachment_is_image( $attachment_id ) ) {
  111. return new WP_Error( 'invalid', esc_html__( 'This attachment can\'t be processed.', 'classifai' ) );
  112. }
  113. $feature = new DescriptiveTextGenerator();
  114. $settings = $feature->get_settings( static::ID );
  115. // Ensure things are set up properly.
  116. if ( empty( $settings ) || ( isset( $settings[ static::ID ]['authenticated'] ) && false === $settings[ static::ID ]['authenticated'] ) || ( ! $feature->is_feature_enabled() ) ) {
  117. return new WP_Error( 'not_enabled', esc_html__( 'Descriptive text generation is disabled or Ollama authentication failed. Please check your settings.', 'classifai' ) );
  118. }
  119. // Download the image so we can encode it.
  120. $image_data = file_get_contents( $image_url ); // phpcs:ignore
  121. if ( false === $image_data || ! is_string( $image_data ) ) {
  122. return new WP_Error( 'invalid', esc_html__( 'Image cannot be downloaded.', 'classifai' ) );
  123. }
  124. /**
  125. * Filter the prompt we will send to Ollama.
  126. *
  127. * @since 3.3.0
  128. * @hook classifai_ollama_descriptive_text_prompt
  129. *
  130. * @param {string} $prompt Prompt we are sending to Ollama.
  131. * @param {int} $attachment_id ID of attachment.
  132. *
  133. * @return {string} Prompt.
  134. */
  135. $prompt = apply_filters( 'classifai_ollama_descriptive_text_prompt', get_default_prompt( $settings[ static::ID ]['prompt'] ?? [] ) ?? $feature->prompt, $attachment_id );
  136. /**
  137. * Filter the request body before sending to Ollama.
  138. *
  139. * @since 3.3.0
  140. * @hook classifai_ollama_descriptive_text_request_body
  141. *
  142. * @param {array} $body Request body that will be sent to Ollama.
  143. * @param {int} $attachment_id ID of attachment.
  144. *
  145. * @return {array} Request body.
  146. */
  147. $body = apply_filters(
  148. 'classifai_ollama_descriptive_text_request_body',
  149. [
  150. 'model' => $settings['model'] ?? '',
  151. 'prompt' => $prompt,
  152. 'images' => [ base64_encode( $image_data ) ], // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.obfuscation_base64_encode
  153. 'stream' => false,
  154. ],
  155. $attachment_id
  156. );
  157. // Make our API request.
  158. $response = $this->request( $this->get_api_generate_url( $settings['endpoint_url'] ?? '' ), $body );
  159. return $response;
  160. }
  161. /**
  162. * Extract text out of an image.
  163. *
  164. * @param string $image_url URL of image to process.
  165. * @param int $attachment_id Post ID for the attachment.
  166. * @return string|WP_Error
  167. */
  168. public function ocr_processing( string $image_url, int $attachment_id ) {
  169. if ( ! wp_attachment_is_image( $attachment_id ) ) {
  170. return new WP_Error( 'invalid', esc_html__( 'This attachment can\'t be processed.', 'classifai' ) );
  171. }
  172. $feature = new ImageTextExtraction();
  173. $settings = $feature->get_settings( static::ID );
  174. // Ensure things are set up properly.
  175. if ( empty( $settings ) || ( isset( $settings[ static::ID ]['authenticated'] ) && false === $settings[ static::ID ]['authenticated'] ) || ( ! $feature->is_feature_enabled() ) ) {
  176. return new WP_Error( 'not_enabled', esc_html__( 'Image Text Extraction is disabled or Ollama authentication failed. Please check your settings.', 'classifai' ) );
  177. }
  178. // Download the image so we can encode it.
  179. $image_data = file_get_contents( $image_url ); // phpcs:ignore
  180. if ( false === $image_data || ! is_string( $image_data ) ) {
  181. return new WP_Error( 'invalid', esc_html__( 'Image cannot be downloaded.', 'classifai' ) );
  182. }
  183. /**
  184. * Filter the prompt we will send to Ollama.
  185. *
  186. * @since 3.3.0
  187. * @hook classifai_ollama_ocr_processing_prompt
  188. *
  189. * @param {string} $prompt Prompt we are sending to Ollama.
  190. * @param {int} $attachment_id ID of attachment.
  191. *
  192. * @return {string} Prompt.
  193. */
  194. $prompt = apply_filters( 'classifai_ollama_ocr_processing_prompt', get_default_prompt( $settings[ static::ID ]['prompt'] ?? [] ) ?? $feature->prompt, $attachment_id );
  195. /**
  196. * Filter the request body before sending to Ollama.
  197. *
  198. * @since 3.3.0
  199. * @hook classifai_ollama_ocr_processing_request_body
  200. *
  201. * @param {array} $body Request body that will be sent to Ollama.
  202. * @param {int} $attachment_id ID of attachment.
  203. *
  204. * @return {array} Request body.
  205. */
  206. $body = apply_filters(
  207. 'classifai_ollama_ocr_processing_request_body',
  208. [
  209. 'model' => $settings['model'] ?? '',
  210. 'prompt' => $prompt,
  211. 'images' => [ base64_encode( $image_data ) ], // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.obfuscation_base64_encode
  212. 'stream' => false,
  213. ],
  214. $attachment_id
  215. );
  216. // Make our API request.
  217. $response = $this->request( $this->get_api_generate_url( $settings['endpoint_url'] ?? '' ), $body );
  218. // If no text was found, return an empty string.
  219. if ( 'none' === strtolower( $response ) ) {
  220. return '';
  221. }
  222. return $response;
  223. }
  224. /**
  225. * Generate tags for an image.
  226. *
  227. * @param string $image_url URL of image to process.
  228. * @param int $attachment_id Post ID for the attachment.
  229. * @return array|WP_Error
  230. */
  231. public function generate_image_tags( string $image_url, int $attachment_id ) {
  232. if ( ! wp_attachment_is_image( $attachment_id ) ) {
  233. return new WP_Error( 'invalid', esc_html__( 'This attachment can\'t be processed.', 'classifai' ) );
  234. }
  235. $feature = new ImageTagsGenerator();
  236. $settings = $feature->get_settings( static::ID );
  237. // Ensure things are set up properly.
  238. if ( empty( $settings ) || ( isset( $settings[ static::ID ]['authenticated'] ) && false === $settings[ static::ID ]['authenticated'] ) || ( ! $feature->is_feature_enabled() ) ) {
  239. return new WP_Error( 'not_enabled', esc_html__( 'Image tag generation is disabled or Ollama authentication failed. Please check your settings.', 'classifai' ) );
  240. }
  241. // Download the image so we can encode it.
  242. $image_data = file_get_contents( $image_url ); // phpcs:ignore
  243. if ( false === $image_data || ! is_string( $image_data ) ) {
  244. return new WP_Error( 'invalid', esc_html__( 'Image cannot be downloaded.', 'classifai' ) );
  245. }
  246. /**
  247. * Filter the prompt we will send to Ollama.
  248. *
  249. * @since 3.3.0
  250. * @hook classifai_ollama_image_tag_prompt
  251. *
  252. * @param {string} $prompt Prompt we are sending to Ollama.
  253. * @param {int} $attachment_id ID of attachment.
  254. *
  255. * @return {string} Prompt.
  256. */
  257. $prompt = apply_filters( 'classifai_ollama_image_tag_prompt', get_default_prompt( $settings[ static::ID ]['prompt'] ?? [] ) ?? $feature->prompt, $attachment_id );
  258. /**
  259. * Filter the request body before sending to Ollama.
  260. *
  261. * @since 3.3.0
  262. * @hook classifai_ollama_image_tag_request_body
  263. *
  264. * @param {array} $body Request body that will be sent to Ollama.
  265. * @param {int} $attachment_id ID of attachment.
  266. *
  267. * @return {array} Request body.
  268. */
  269. $body = apply_filters(
  270. 'classifai_ollama_image_tag_request_body',
  271. [
  272. 'model' => $settings['model'] ?? '',
  273. 'prompt' => $prompt,
  274. 'images' => [ base64_encode( $image_data ) ], // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.obfuscation_base64_encode
  275. 'stream' => false,
  276. ],
  277. $attachment_id
  278. );
  279. // Make our API request.
  280. $response = $this->request( $this->get_api_generate_url( $settings['endpoint_url'] ?? '' ), $body );
  281. // If we have a response, clean it up.
  282. if ( ! is_wp_error( $response ) ) {
  283. $response = array_filter( explode( '- ', $response ) );
  284. $response = array_map( 'trim', $response );
  285. }
  286. // Ensure we have a valid response after processing.
  287. if ( ! is_array( $response ) || empty( $response ) ) {
  288. return new WP_Error( 'error', esc_html__( 'No tags found.', 'classifai' ) );
  289. }
  290. return $response;
  291. }
  292. /**
  293. * Common entry point for all REST endpoints for this provider.
  294. *
  295. * @param int $attachment_id The attachment ID we're processing.
  296. * @param string $route_to_call The name of the route we're going to be processing.
  297. * @param array $args Optional arguments to pass to the route.
  298. * @return array|string|WP_Error|null
  299. */
  300. public function rest_endpoint_callback( $attachment_id, string $route_to_call = '', array $args = [] ) {
  301. // Check to be sure the post both exists and is an attachment.
  302. if ( ! get_post( $attachment_id ) || 'attachment' !== get_post_type( $attachment_id ) ) {
  303. /* translators: %1$s: the attachment ID */
  304. return new WP_Error( 'incorrect_ID', sprintf( esc_html__( '%1$d is not found or is not an attachment', 'classifai' ), $attachment_id ), [ 'status' => 404 ] );
  305. }
  306. $metadata = wp_get_attachment_metadata( $attachment_id );
  307. if ( ! $metadata || ! is_array( $metadata ) ) {
  308. return new WP_Error( 'invalid', esc_html__( 'No valid metadata found.', 'classifai' ) );
  309. }
  310. $image_url = get_modified_image_source_url( $attachment_id );
  311. if ( empty( $image_url ) || ! filter_var( $image_url, FILTER_VALIDATE_URL ) ) {
  312. if ( isset( $metadata['sizes'] ) && is_array( $metadata['sizes'] ) ) {
  313. $image_url = get_largest_size_and_dimensions_image_url(
  314. get_attached_file( $attachment_id ),
  315. wp_get_attachment_url( $attachment_id ),
  316. $metadata,
  317. [ 50, 16000 ],
  318. [ 50, 16000 ],
  319. computer_vision_max_filesize()
  320. );
  321. } else {
  322. $image_url = wp_get_attachment_url( $attachment_id );
  323. }
  324. }
  325. if ( empty( $image_url ) ) {
  326. return new WP_Error( 'error', esc_html__( 'Image does not meet size requirements. Please ensure it is at least 50x50 but less than 16000x16000 and smaller than 20MB.', 'classifai' ) );
  327. }
  328. switch ( $route_to_call ) {
  329. case 'descriptive_text':
  330. return $this->generate_alt_text( $image_url, $attachment_id );
  331. case 'ocr':
  332. return $this->ocr_processing( $image_url, $attachment_id );
  333. case 'tags':
  334. return $this->generate_image_tags( $image_url, $attachment_id );
  335. }
  336. }
  337. }