-
Notifications
You must be signed in to change notification settings - Fork 9
[WIP] feat: filter files on mount #27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| package service | ||
|
|
||
| import ( | ||
| "encoding/json" | ||
| "fmt" | ||
| "os" | ||
| "path/filepath" | ||
|
|
@@ -68,6 +69,24 @@ func (s *Service) localCreateVolume(ctx context.Context, req *csi.CreateVolumeRe | |
| } | ||
| } | ||
|
|
||
| excludeFilePatternsParam := strings.TrimSpace(parameters[s.cfg.Get().ParameterKeyExcludeFiles()]) | ||
| var excludeFilePatterns []string | ||
| if excludeFilePatternsParam != "" { | ||
| if err := json.Unmarshal([]byte(excludeFilePatternsParam), &excludeFilePatterns); err != nil { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about using a comma-separated format for the parameter value? For example: |
||
| return nil, isStaticVolume, status.Errorf(codes.InvalidArgument, "invalid parameter:%s: must be valid JSON array: %v", s.cfg.Get().ParameterKeyExcludeFiles(), err) | ||
| } | ||
|
|
||
| // Validate patterns for security | ||
| for _, p := range excludeFilePatterns { | ||
| if strings.HasPrefix(p, "/") && len(p) > 1 { | ||
| return nil, isStaticVolume, status.Errorf(codes.InvalidArgument, "invalid parameter:%s: absolute paths not allowed: %s", s.cfg.Get().ParameterKeyExcludeFiles(), p) | ||
| } | ||
| if strings.Contains(p, "..") { | ||
| return nil, isStaticVolume, status.Errorf(codes.InvalidArgument, "invalid parameter:%s: parent directory reference not allowed: %s", s.cfg.Get().ParameterKeyExcludeFiles(), p) | ||
| } | ||
| } | ||
| } | ||
|
Comment on lines
+79
to
+88
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This validation logic for |
||
|
|
||
| parentSpan := trace.SpanFromContext(ctx) | ||
| parentSpan.SetAttributes(attribute.String("volume_name", volumeName)) | ||
| parentSpan.SetAttributes(attribute.String("reference", modelReference)) | ||
|
|
@@ -78,7 +97,7 @@ func (s *Service) localCreateVolume(ctx context.Context, req *csi.CreateVolumeRe | |
| startedAt := time.Now() | ||
| ctx, span := tracing.Tracer.Start(ctx, "PullModel") | ||
| span.SetAttributes(attribute.String("model_dir", modelDir)) | ||
| if err := s.worker.PullModel(ctx, isStaticVolume, volumeName, "", modelReference, modelDir, checkDiskQuota, excludeModelWeights); err != nil { | ||
| if err := s.worker.PullModel(ctx, isStaticVolume, volumeName, "", modelReference, modelDir, checkDiskQuota, excludeModelWeights, excludeFilePatterns); err != nil { | ||
| span.SetStatus(otelCodes.Error, "failed to pull model") | ||
| span.RecordError(err) | ||
| span.End() | ||
|
|
@@ -111,7 +130,7 @@ func (s *Service) localCreateVolume(ctx context.Context, req *csi.CreateVolumeRe | |
| startedAt := time.Now() | ||
| ctx, span := tracing.Tracer.Start(ctx, "PullModel") | ||
| span.SetAttributes(attribute.String("model_dir", modelDir)) | ||
| if err := s.worker.PullModel(ctx, isStaticVolume, volumeName, mountID, modelReference, modelDir, checkDiskQuota, excludeModelWeights); err != nil { | ||
| if err := s.worker.PullModel(ctx, isStaticVolume, volumeName, mountID, modelReference, modelDir, checkDiskQuota, excludeModelWeights, excludeFilePatterns); err != nil { | ||
| span.SetStatus(otelCodes.Error, "failed to pull model") | ||
| span.RecordError(err) | ||
| span.End() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| package service | ||
|
|
||
| import ( | ||
| "encoding/json" | ||
| "errors" | ||
| "fmt" | ||
| "net/http" | ||
|
|
@@ -86,6 +87,31 @@ func (h *DynamicServerHandler) CreateVolume(c echo.Context) error { | |
| }) | ||
| } | ||
|
|
||
| // Validate exclude_file_patterns | ||
| for _, p := range req.ExcludeFilePatterns { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this path security check is unnecessary because, after using gitignore to match the results of |
||
| if strings.HasPrefix(p, "/") && len(p) > 1 { | ||
| return c.JSON(http.StatusBadRequest, ErrorResponse{ | ||
| Code: ERR_CODE_INVALID_ARGUMENT, | ||
| Message: fmt.Sprintf("exclude_file_patterns: absolute paths not allowed: %s", p), | ||
| }) | ||
| } | ||
| if strings.Contains(p, "..") { | ||
| return c.JSON(http.StatusBadRequest, ErrorResponse{ | ||
| Code: ERR_CODE_INVALID_ARGUMENT, | ||
| Message: fmt.Sprintf("exclude_file_patterns: parent directory reference not allowed: %s", p), | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| excludeFilesJSON := "[]" | ||
| if len(req.ExcludeFilePatterns) > 0 { | ||
| jsonBytes, err := json.Marshal(req.ExcludeFilePatterns) | ||
| if err != nil { | ||
| return handleError(c, fmt.Errorf("marshal exclude_file_patterns: %w", err)) | ||
| } | ||
| excludeFilesJSON = string(jsonBytes) | ||
| } | ||
|
|
||
| _, err := h.svc.CreateVolume(c.Request().Context(), &csi.CreateVolumeRequest{ | ||
| Name: volumeName, | ||
| Parameters: map[string]string{ | ||
|
|
@@ -94,6 +120,7 @@ func (h *DynamicServerHandler) CreateVolume(c echo.Context) error { | |
| h.cfg.Get().ParameterKeyMountID(): req.MountID, | ||
| h.cfg.Get().ParameterKeyCheckDiskQuota(): strconv.FormatBool(req.CheckDiskQuota), | ||
| h.cfg.Get().ParameterKeyExcludeModelWeights(): strconv.FormatBool(req.ExcludeModelWeights), | ||
| h.cfg.Get().ParameterKeyExcludeFiles(): excludeFilesJSON, | ||
| }, | ||
| }) | ||
| if err != nil { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| package service | ||
|
|
||
| import ( | ||
| "encoding/json" | ||
| "path/filepath" | ||
| "strconv" | ||
| "strings" | ||
|
|
@@ -104,8 +105,24 @@ func (s *Service) nodePublishVolume( | |
| } | ||
| } | ||
|
|
||
| excludeFilePatternsParam := volumeAttributes[s.cfg.Get().ParameterKeyExcludeFiles()] | ||
| var excludeFilePatterns []string | ||
| if excludeFilePatternsParam != "" { | ||
| if err := json.Unmarshal([]byte(excludeFilePatternsParam), &excludeFilePatterns); err != nil { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
| return nil, isStaticVolume, status.Errorf(codes.InvalidArgument, "invalid parameter:%s: must be valid JSON array: %v", s.cfg.Get().ParameterKeyExcludeFiles(), err) | ||
| } | ||
| for _, p := range excludeFilePatterns { | ||
| if strings.HasPrefix(p, "/") && len(p) > 1 { | ||
| return nil, isStaticVolume, status.Errorf(codes.InvalidArgument, "invalid parameter:%s: absolute paths not allowed: %s", s.cfg.Get().ParameterKeyExcludeFiles(), p) | ||
| } | ||
| if strings.Contains(p, "..") { | ||
| return nil, isStaticVolume, status.Errorf(codes.InvalidArgument, "invalid parameter:%s: parent directory reference not allowed: %s", s.cfg.Get().ParameterKeyExcludeFiles(), p) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| logger.WithContext(ctx).Infof("publishing static inline volume: %s", staticInlineModelReference) | ||
| resp, err := s.nodePublishVolumeStaticInlineVolume(ctx, volumeID, targetPath, staticInlineModelReference, excludeModelWeights) | ||
| resp, err := s.nodePublishVolumeStaticInlineVolume(ctx, volumeID, targetPath, staticInlineModelReference, excludeModelWeights, excludeFilePatterns) | ||
| return resp, isStaticVolume, err | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,173 @@ | ||
| package service | ||
|
|
||
| import ( | ||
| "os" | ||
| "path/filepath" | ||
| "sort" | ||
| "strings" | ||
|
|
||
| gitignore "github.com/go-git/go-git/v5/plumbing/format/gitignore" | ||
| "github.com/modelpack/model-csi-driver/pkg/logger" | ||
| "github.com/pkg/errors" | ||
| ) | ||
|
|
||
| // FilePatternMatcher wraps gitignore pattern matching functionality | ||
| type FilePatternMatcher struct { | ||
| matcher gitignore.Matcher | ||
| patterns []string | ||
| } | ||
|
|
||
| // NewFilePatternMatcher creates a new pattern matcher from a list of gitignore-compatible patterns | ||
| func NewFilePatternMatcher(patterns []string) (*FilePatternMatcher, error) { | ||
| // Validate patterns for security | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
| for _, p := range patterns { | ||
| // Check for absolute paths (starts with / and has more characters) | ||
| if strings.HasPrefix(p, "/") && len(p) > 1 { | ||
| return nil, errors.Errorf("absolute path patterns are not allowed: %s", p) | ||
| } | ||
| if strings.Contains(p, "..") { | ||
| return nil, errors.Errorf("parent directory reference is not allowed: %s", p) | ||
| } | ||
| } | ||
|
|
||
| // Create gitignore matcher from patterns | ||
| // Parse each string pattern into gitignore.Pattern | ||
| var gitPatterns []gitignore.Pattern | ||
| for _, p := range patterns { | ||
| gitPatterns = append(gitPatterns, gitignore.ParsePattern(p, nil)) | ||
| } | ||
| matcher := gitignore.NewMatcher(gitPatterns) | ||
|
|
||
| return &FilePatternMatcher{ | ||
| matcher: matcher, | ||
| patterns: patterns, | ||
| }, nil | ||
| } | ||
|
|
||
| // Match returns true if the given path matches any of the exclusion patterns | ||
| func (m *FilePatternMatcher) Match(path string) bool { | ||
| // gitignore matcher expects paths in forward-slash format | ||
| // and uses a slice of strings for path components | ||
| path = filepath.ToSlash(path) | ||
| pathParts := strings.Split(path, "/") | ||
| isDir := strings.HasSuffix(path, "/") | ||
| return m.matcher.Match(pathParts, isDir) | ||
| } | ||
|
|
||
| // Excludes returns true if any exclusion patterns are defined | ||
| func (m *FilePatternMatcher) Excludes() bool { | ||
| return len(m.patterns) > 0 | ||
| } | ||
|
|
||
| // filterFilesByPatterns walks the target directory and removes files matching the exclusion patterns | ||
| // Returns a list of excluded file paths (relative to targetDir) | ||
| func filterFilesByPatterns(targetDir string, matcher *FilePatternMatcher) ([]string, error) { | ||
| excludedFiles := []string{} | ||
|
|
||
| // First pass: identify and remove matched files | ||
| err := filepath.Walk(targetDir, func(path string, info os.FileInfo, err error) error { | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| // Skip the target directory itself | ||
| if path == targetDir { | ||
| return nil | ||
| } | ||
|
|
||
| // Get relative path for pattern matching | ||
| relPath, err := filepath.Rel(targetDir, path) | ||
| if err != nil { | ||
| return errors.Wrap(err, "get relative path") | ||
| } | ||
|
|
||
| // Check if file/directory matches exclusion pattern | ||
| if matcher.Match(relPath) { | ||
| if !info.IsDir() { | ||
| logger.Logger().Infof("Excluding file: %s", relPath) | ||
| excludedFiles = append(excludedFiles, relPath) | ||
|
|
||
| // Remove the file | ||
| if err := os.Remove(path); err != nil { | ||
| return errors.Wrapf(err, "remove excluded file: %s", relPath) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
| }) | ||
|
|
||
| if err != nil { | ||
| return nil, errors.Wrap(err, "walk directory for pattern matching") | ||
| } | ||
|
|
||
| // Second pass: remove empty directories | ||
| removeEmptyDirectories(targetDir, matcher) | ||
|
|
||
| // Sort excluded files for consistent logging | ||
| sort.Strings(excludedFiles) | ||
|
|
||
| logger.Logger().Infof("Excluded %d file(s) matching patterns", len(excludedFiles)) | ||
|
|
||
| return excludedFiles, nil | ||
| } | ||
|
|
||
| // removeEmptyDirectories removes empty directories that were created after file removal | ||
| func removeEmptyDirectories(targetDir string, matcher *FilePatternMatcher) { | ||
| dirsToRemove := []string{} | ||
|
|
||
| // First, find all empty directories | ||
| err := filepath.Walk(targetDir, func(path string, info os.FileInfo, err error) error { | ||
| if err != nil { | ||
| return nil // Continue on error | ||
| } | ||
|
|
||
| if info.IsDir() && path != targetDir { | ||
| isEmpty, _ := isDirEmpty(path) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error returned from isEmpty, err := isDirEmpty(path)
if err != nil {
logger.Logger().WithError(err).Warnf("Failed to check if directory is empty: %s", path)
return nil
}
if isEmpty { |
||
| if isEmpty { | ||
| dirsToRemove = append(dirsToRemove, path) | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
| }) | ||
|
|
||
| if err != nil { | ||
| logger.Logger().WithError(err).Warn("Failed to walk directories for cleanup") | ||
| return | ||
| } | ||
|
|
||
| // Remove empty directories in reverse order (deepest first) | ||
| for i := len(dirsToRemove) - 1; i >= 0; i-- { | ||
| dir := dirsToRemove[i] | ||
| if err := os.Remove(dir); err != nil { | ||
| logger.Logger().WithError(err).Warnf("Failed to remove empty directory: %s", dir) | ||
| } else { | ||
| relPath, _ := filepath.Rel(targetDir, dir) | ||
| logger.Logger().Infof("Removed empty directory: %s", relPath) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // isDirEmpty checks if a directory is empty | ||
| func isDirEmpty(dir string) (bool, error) { | ||
| f, err := os.Open(dir) | ||
| if err != nil { | ||
| return false, err | ||
| } | ||
| defer func(f *os.File) { | ||
| err = f.Close() | ||
| if err != nil { | ||
| return | ||
| } | ||
| }(f) | ||
|
|
||
| _, err = f.Readdirnames(1) | ||
| if err == nil { | ||
| return false, nil // Directory is not empty | ||
| } | ||
| if err.Error() == "EOF" { | ||
| return true, nil // Directory is empty | ||
| } | ||
|
Comment on lines
+169
to
+171
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comparing the error string with if err == io.EOF {
return true, nil // Directory is empty
} |
||
| return false, err // Error reading directory | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe
return cfg.ServiceName + "/exclude-file-patterns".