ElasticSearch のクラスを作ってみた

ショコラ
ショコラ

ElasticSearch のクラスを作ってみた

きっと調べれば出てくると思うけど、理解するために作っちゃいました。

もっさん先輩
もっさん先輩

コメントもバグってます。←修正済み。
①データをベクトル化するのに AWS の タイタンを使用しました。
(お金がかからずベクトル化ってできるんですかね?といってもタイタンは物凄く安価ですけど)
②あまりにデカイ画像を送ったらエラーになってしまったので、1000 x 1000 にしました。

<?php

namespace App\Libs;

use Aws\Sdk;
use Gemini\Data\Blob;
use Gemini\Enums\MimeType;

class ElasticSearch
{
  public $timeout;
  public $els_host;
  public $els_port;
  public $aws_region;
  public $aws_key;
  public $aws_secret;

  public function __construct()
  {
    $this->timeout    = config('elasticsearch.timeout',10);
    $this->els_host   = config('elasticsearch.host');
    $this->els_port   = config('elasticsearch.port');
    $this->aws_region = config('elasticsearch.aws_region');
    $this->aws_key    = config('elasticsearch.aws_key');
    $this->aws_secret = config('elasticsearch.aws_secret');
  }

  // 1000px x 1000px の画像を生成する
  public static function image( $inputImage, $size = 1000 )
  {
    if($inputImage instanceof Image) return $inputImage;

    $img = new Image( $inputImage );
    if(!$img()) return null;
    $rotate = (6 == ($img->meta_data['Orientation'] ?? 0)) ? 270 : 0;
    return $img->set_bgcolor_white()
      ->resize_square( $size )
      ->rotate( $rotate );
  }

  // AI は blobImage を使う
  public static function blobImage( $inputImage, $size = 1000 )
  {
    $img = self::image( $inputImage, $size );
    return $img ? new Blob(mimeType:MimeType::IMAGE_JPEG, data:$img->base64()) : null;
  }

  public function createBedrockRuntime()
  {
    $sdk = new Sdk([
      'region' => $this->aws_region,
      'credentials' => [
        'key'    => $this->aws_key,
        'secret' => $this->aws_secret,
      ],
    ]);
    return $sdk->createBedrockRuntime();
  }

  // invokeModelTitanEmbedImage の呼び出し元でエラーチェックを行う
  public function invokeModelTitanEmbedImage( $inputText = null, $inputImage = null, $modelId = 'amazon.titan-embed-image-v1' )
  {
    $body = [];
    if ( $inputText  ) {
      $body['inputText']  = $inputText;
    }
    if ( $inputImage ) {
      $body['inputImage'] = static::image( $inputImage )->base64();
    }
    $result = $this->createBedrockRuntime()->invokeModel([
      'modelId'     => $modelId,
      'contentType' => 'application/json',
      'accept'      => 'application/json',
      'body'        => json_encode($body),
    ]);
    return json_decode( $result['body'], true );
  }

  public function bulk( $request )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/_bulk";

    $headers = ['Content-Type: application/json'];
    $post = implode("\n",array_map('json_encode',$request))."\n";

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT,$this->timeout ); // タイムアウト 10秒
    curl_setopt($ch,CURLOPT_POST          ,true    ); // POSTリクエストとして設定(明示的に)
    curl_setopt($ch,CURLOPT_HTTPHEADER    ,$headers);
    curl_setopt($ch,CURLOPT_POSTFIELDS    ,$post   ); // ← ここでボディを送る
    $json = curl_exec($ch);
    return json_decode($json,true);
  }

  public function post( $table, $id, $request )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/{$table}/_doc/{$id}?pretty";

    $headers = ['Content-Type: application/json'];
    $post = json_encode( $request, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES );

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT,$this->timeout ); // タイムアウト 10秒
    curl_setopt($ch,CURLOPT_POST          ,true    ); // POSTリクエストとして設定(明示的に)
    curl_setopt($ch,CURLOPT_HTTPHEADER    ,$headers);
    curl_setopt($ch,CURLOPT_POSTFIELDS    ,$post   ); // ← ここでボディを送る
    $json = curl_exec($ch);
    return json_decode($json,true);
  }

  public function put( $table, $request )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/{$table}";

    $headers = ['Content-Type: application/json'];
    $post = json_encode( $request, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES );

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT,$this->timeout ); // タイムアウト 10秒
    curl_setopt($ch,CURLOPT_CUSTOMREQUEST,'PUT'    );
    curl_setopt($ch,CURLOPT_HTTPHEADER    ,$headers);
    curl_setopt($ch,CURLOPT_POSTFIELDS    ,$post   ); // ← ここでボディを送る
    $json = curl_exec($ch);
    return json_decode($json,true);
  }

  public function get( $table, $id )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/{$table}/_doc/{$id}";

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT,$this->timeout );
    curl_setopt($ch,CURLOPT_HTTPGET       ,true    );
    $json = curl_exec($ch);
    $result = json_decode($json,true);
    if(isset($result['error']) ||
      (isset($result['found']) && !$result['found'])) return null;
    return $result;
  }

  public function getAll( $table, $size = 999 )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/{$table}/_search";

    $headers = ['Content-Type: application/json'];
    $post = json_encode([
      'query'=> ['match_all'=> new \stdClass],
      'size'=> $size,
    ]);

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT,$this->timeout );
    curl_setopt($ch,CURLOPT_HTTPGET       ,true    );
    curl_setopt($ch,CURLOPT_HTTPHEADER    ,$headers);
    curl_setopt($ch,CURLOPT_POSTFIELDS    ,$post   );
    $json = curl_exec($ch);
    $result = json_decode($json,true);
    return $result['hits']['hits'] ?? [];
  }

  public function del( $table, $id )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/{$table}/_doc/{$id}";

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT,$this->timeout );
    curl_setopt($ch,CURLOPT_CUSTOMREQUEST ,'DELETE');
    $json = curl_exec($ch);
    return json_decode($json,true);
  }

  public function delAll( $table )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/{$table}";

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT,$this->timeout );
    curl_setopt($ch,CURLOPT_CUSTOMREQUEST ,'DELETE');
    $json = curl_exec($ch);
    return json_decode($json,true);
  }

  public function mapping( $table )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/{$table}/_mapping?pretty";

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT,$this->timeout );
    curl_setopt($ch,CURLOPT_HTTPGET       ,true    );
    $json = curl_exec($ch);
    return json_decode($json,true);
  }

  public function seach( $table, $request )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/{$table}/_search";

    $headers = ['Content-Type: application/json'];
    $post = json_encode( $request, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES );

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT,$this->timeout );
    curl_setopt($ch,CURLOPT_HTTPGET       ,true    );
    curl_setopt($ch,CURLOPT_HTTPHEADER    ,$headers);
    curl_setopt($ch,CURLOPT_POSTFIELDS    ,$post   );
    $json = curl_exec($ch);
    $result = json_decode($json,true);
    return $result['hits']['hits'] ?? [];
  }

  public function search_term( $table, $key, $value )
  {
    $request = [
      'query'=> ['term'=> [$key.'.keyword'=> $value]],
    ];
    return $this->seach($table,$request);
  }

  public function search_keys( $table, $keys )
  {
    $must = [];
    foreach ( $keys as $key => $keyword ) {
      $must[] = ['term'=> [$key.'.keyword'=>$keyword]];
    }
    $request = ['query'=> ['bool'=> ['must'=> $must]]];
    return $this->seach($table,$request);
  }

  public function search_match( $table, $key, $query )
  {
    $request = [
      'query'=> ['match'=> [$key => ['query'=> $query,'operator'=> 'and']]],
    ];
    return $this->seach($table,$request);
  }

  public function knnSearch( $table, $request )
  {
    $url = "http://{$this->els_host}:{$this->els_port}/{$table}/_knn_search?pretty";

    $headers = ['Content-Type: application/json'];
    $post = json_encode( $request, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES );

    $ch = curl_init();
    curl_setopt($ch,CURLOPT_URL           ,$url    );
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,true    ); // 結果を文字列で返す
    curl_setopt($ch,CURLOPT_TIMEOUT, $this->timeout);
    curl_setopt($ch,CURLOPT_HTTPGET       ,true    );
    curl_setopt($ch,CURLOPT_HTTPHEADER    ,$headers);
    curl_setopt($ch,CURLOPT_POSTFIELDS    ,$post   );
    $json = curl_exec($ch);
    $result = json_decode($json,true);
    return $result['hits']['hits'] ?? [];
  }

  public function knnSearchByVector( $table, $query_vector, $k = 10, $num_candidates = 100 )
  {
    return $this->knnSearch($table,[
      'knn'=> [
        'field'         => 'vector',
        'query_vector'  => $query_vector,
        'k'             => $k, // 取得数
        'num_candidates'=> $num_candidates,
      ]
    ]);
  }

  // NGの場合は空の配列を返す
  public function knnSearchByMultimodal( $table, $inputText = null, $inputImage = null, $k = 10, $num_candidates = 100 )
  {
    $result = [];
    try {
      $vec = $this->invokeModelTitanEmbedImage( $inputText, $inputImage );
      $result = isset($vec['embedding']) ? $this->knnSearchByVector($table,$vec['embedding'],$k,$num_candidates) : [];
    }
    catch (\Exception $e) {
      \Log::debug( $e->getMessage() );
    }
    return $result;
  }
}

以上

Scroll to Top