diff --git a/storage-service/src/go.mod b/storage-service/src/go.mod index 3b2e54c..705ed13 100644 --- a/storage-service/src/go.mod +++ b/storage-service/src/go.mod @@ -11,6 +11,7 @@ require ( ) require ( + github.com/adrg/strutil v0.3.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/elastic/elastic-transport-go/v8 v8.6.0 // indirect github.com/gabriel-vasile/mimetype v1.4.8 // indirect diff --git a/storage-service/src/go.sum b/storage-service/src/go.sum index 4369c92..98f4ab2 100644 --- a/storage-service/src/go.sum +++ b/storage-service/src/go.sum @@ -1,3 +1,5 @@ +github.com/adrg/strutil v0.3.1 h1:OLvSS7CSJO8lBii4YmBt8jiK9QOtB9CzCzwl4Ic/Fz4= +github.com/adrg/strutil v0.3.1/go.mod h1:8h90y18QLrs11IBffcGX3NW/GFBXCMcNg4M7H6MspPA= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= diff --git a/storage-service/src/service/ApiHandlerService.go b/storage-service/src/service/ApiHandlerService.go index 2b47d66..aacae29 100644 --- a/storage-service/src/service/ApiHandlerService.go +++ b/storage-service/src/service/ApiHandlerService.go @@ -4,10 +4,12 @@ import ( "context" "errors" "fmt" + "github.com/adrg/strutil/metrics" "log/slog" "strings" "time" + "github.com/adrg/strutil" "github.com/gdexlab/go-render/render" "github.com/go-playground/validator/v10" "github.com/google/uuid" @@ -74,22 +76,36 @@ func (a *ApiHandler) CreateMeme( return nil, fmt.Errorf("failed to do ocr : %w", err) } + ocrTextResult := ocrResult.OcrText slog.Info("CreateMeme: ocr result", commonconst.ACCOUNTID_LOG, request.AccountId, "id", idUuid, - "ocrText", ocrResult.OcrText) + "ocrText", ocrTextResult) contentDuplicate, err := a.findContentDuplicates(ctx, ocrResult) if err != nil { return nil, err } - if contentDuplicate != nil { - return a.HandleDuplicate(ctx, server.DuplicateImage, contentDuplicate, request) + if strings.TrimSpace(ocrTextResult) == "" { + return nil, errors.New("no text on image") } - if strings.TrimSpace(ocrResult.OcrText) == "" { - return nil, errors.New("no text on image") + if contentDuplicate != nil { + contentDuplicateTextResult := contentDuplicate.Result + similarity := strutil.Similarity(ocrTextResult, contentDuplicateTextResult, metrics.NewLevenshtein()) + + slog.Info("CreateMeme: found content-duplicate by embedding search", + commonconst.ACCOUNTID_LOG, request.AccountId, + "id", idUuid, + "dupId", contentDuplicate.ImageId, + "ocrText", ocrTextResult, + "dupOcrText", contentDuplicate.Result, + "similarity", similarity) + + if similarity > 0.5 { + return a.HandleDuplicate(ctx, server.DuplicateImage, contentDuplicate, request) + } } err = a.imageStorage.Save(ctx, idUuid, ocrResult.Image, ocrResult.Thumbnail.Image)