feat: add word count support for read operations

Extended word count functionality to read operations (read_note, stat, list) to complement existing write operation support. Changes: - read_note: Now automatically includes wordCount when returning content (with withContent or parseFrontmatter options) - stat: Added optional includeWordCount parameter with performance warning - list: Added optional includeWordCount parameter with performance warning - All operations use same word counting rules (excludes frontmatter and Obsidian comments) - Best-effort error handling for batch operations Technical details: - Updated ParsedNote and FileMetadata type definitions to include optional wordCount field - Added comprehensive test coverage (18 new tests) - Updated tool descriptions with usage notes and performance warnings - Updated CHANGELOG.md to document new features in version 1.1.0
2025-10-30 10:46:16 -04:00
parent c2002b0cdb
commit f8c7b6d53f
7 changed files with 380 additions and 12 deletions
@@ -27,7 +27,7 @@ export class ToolRegistry {
 		return [
 			{
 				name: "read_note",
-				description: "Read the content of a file from the Obsidian vault with optional frontmatter parsing. Use this to read the contents of a specific note or file. Path must be vault-relative (no leading slash) and include the file extension. Use list() first if you're unsure of the exact path. This only works on files, not folders. By default returns raw content. Set parseFrontmatter to true to get structured data with separated frontmatter and content.",
+				description: "Read the content of a file from the Obsidian vault with optional frontmatter parsing. Returns word count (excluding frontmatter and Obsidian comments) when content is included in the response. Use this to read the contents of a specific note or file. Path must be vault-relative (no leading slash) and include the file extension. Use list() first if you're unsure of the exact path. This only works on files, not folders. By default returns raw content with word count. Set parseFrontmatter to true to get structured data with separated frontmatter, content, and word count.",
 				inputSchema: {
 					type: "object",
 					properties: {
@@ -289,7 +289,7 @@ export class ToolRegistry {
 			},
 			{
 				name: "list",
-				description: "List files and/or directories with advanced filtering, recursion, and pagination. Returns structured JSON with file/directory metadata and optional frontmatter summaries. Supports glob patterns for includes/excludes, recursive traversal, type filtering, and cursor-based pagination. Use this to explore vault structure with fine-grained control.",
+				description: "List files and/or directories with advanced filtering, recursion, and pagination. Returns structured JSON with file/directory metadata and optional frontmatter summaries. Optional: includeWordCount (boolean) - If true, read each file's content and compute word count (excluding frontmatter and Obsidian comments). WARNING: This can be very slow for large directories or recursive listings, as it reads every file. Files that cannot be read are skipped (best effort). Only computed for files, not directories. Supports glob patterns for includes/excludes, recursive traversal, type filtering, and cursor-based pagination. Use this to explore vault structure with fine-grained control.",
 				inputSchema: {
 					type: "object",
 					properties: {
@@ -327,19 +327,27 @@ export class ToolRegistry {
 						withFrontmatterSummary: {
 							type: "boolean",
 							description: "If true, include parsed frontmatter (title, tags, aliases) for markdown files without reading full content. Default: false."
+						},
+						includeWordCount: {
+							type: "boolean",
+							description: "If true, read each file's content and compute word count. WARNING: Can be very slow for large directories or recursive listings. Only applies to files. Default: false"
 						}
 					}
 				}
 			},
 			{
 				name: "stat",
-				description: "Get detailed metadata for a file or folder at a specific path. Returns existence status, kind (file or directory), and full metadata including size, dates, etc. Use this to check if a path exists and get its properties. More detailed than exists() but slightly slower. Returns structured JSON with path, exists boolean, kind, and metadata object.",
+				description: "Get detailed metadata for a file or folder at a specific path. Returns existence status, kind (file or directory), and full metadata including size, dates, etc. Optional: includeWordCount (boolean) - If true, read file content and compute word count (excluding frontmatter and Obsidian comments). WARNING: This requires reading the entire file and is significantly slower than metadata-only stat. Only works for files, not directories. Use this to check if a path exists and get its properties. More detailed than exists() but slightly slower. Returns structured JSON with path, exists boolean, kind, and metadata object.",
 				inputSchema: {
 					type: "object",
 					properties: {
 						path: {
 							type: "string",
 							description: "Vault-relative path to check (e.g., 'folder/note.md' or 'projects'). Can be a file or folder. Paths are case-sensitive on macOS/Linux. Do not use leading or trailing slashes."
+						},
+						includeWordCount: {
+							type: "boolean",
+							description: "If true, read file content and compute word count. WARNING: Significantly slower than metadata-only stat. Only applies to files. Default: false"
 						}
 					},
 					required: ["path"]
@@ -561,11 +569,12 @@ export class ToolRegistry {
 						only: args.only,
 						limit: args.limit,
 						cursor: args.cursor,
-						withFrontmatterSummary: args.withFrontmatterSummary
+						withFrontmatterSummary: args.withFrontmatterSummary,
+						includeWordCount: args.includeWordCount
 					});
 					break;
 				case "stat":
-					result = await this.vaultTools.stat(args.path);
+					result = await this.vaultTools.stat(args.path, args.includeWordCount);
 					break;
 				case "exists":
 					result = await this.vaultTools.exists(args.path);
@@ -82,6 +82,17 @@ export class NoteTools {

 			// If no special options, return simple content
 			if (!parseFrontmatter) {
+				// Compute word count when returning content
+				if (withContent) {
+					const wordCount = ContentUtils.countWords(content);
+					const result = {
+						content,
+						wordCount
+					};
+					return {
+						content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
+					};
+				}
 				return {
 					content: [{ type: "text", text: content }]
 				};
@@ -110,6 +121,11 @@ export class NoteTools {
 				result.contentWithoutFrontmatter = extracted.contentWithoutFrontmatter;
 			}

+			// Add word count when content is included
+			if (withContent) {
+				result.wordCount = ContentUtils.countWords(content);
+			}
+
 			return {
 				content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
 			};
@@ -6,6 +6,7 @@ import { GlobUtils } from '../utils/glob-utils';
 import { SearchUtils } from '../utils/search-utils';
 import { WaypointUtils } from '../utils/waypoint-utils';
 import { LinkUtils } from '../utils/link-utils';
+import { ContentUtils } from '../utils/content-utils';
 import { IVaultAdapter, IMetadataCacheAdapter } from '../adapters/interfaces';

 export class VaultTools {
@@ -145,6 +146,7 @@ export class VaultTools {
 		limit?: number;
 		cursor?: string;
 		withFrontmatterSummary?: boolean;
+		includeWordCount?: boolean;
 	}): Promise<CallToolResult> {
 		const {
 			path,
@@ -154,7 +156,8 @@ export class VaultTools {
 			only = 'any',
 			limit,
 			cursor,
-			withFrontmatterSummary = false
+			withFrontmatterSummary = false,
+			includeWordCount = false
 		} = options;

 		let items: Array<FileMetadataWithFrontmatter | DirectoryMetadata> = [];
@@ -201,7 +204,7 @@ export class VaultTools {
 		}

 		// Collect items based on recursive flag
-		await this.collectItems(targetFolder, items, recursive, includes, excludes, only, withFrontmatterSummary);
+		await this.collectItems(targetFolder, items, recursive, includes, excludes, only, withFrontmatterSummary, includeWordCount);

 		// Sort: directories first, then files, alphabetically within each group
 		items.sort((a, b) => {
@@ -259,7 +262,8 @@ export class VaultTools {
 		includes?: string[],
 		excludes?: string[],
 		only?: 'files' | 'directories' | 'any',
-		withFrontmatterSummary?: boolean
+		withFrontmatterSummary?: boolean,
+		includeWordCount?: boolean
 	): Promise<void> {
 		for (const item of folder.children) {
 			// Skip the vault root itself
@@ -276,6 +280,18 @@ export class VaultTools {
 			if (item instanceof TFile) {
 				if (only !== 'directories') {
 					const fileMetadata = await this.createFileMetadataWithFrontmatter(item, withFrontmatterSummary || false);
+
+					// Optionally include word count (best effort)
+					if (includeWordCount) {
+						try {
+							const content = await this.vault.read(item);
+							fileMetadata.wordCount = ContentUtils.countWords(content);
+						} catch (error) {
+							// Skip word count if file can't be read (binary file, etc.)
+							// wordCount field simply omitted for this file
+						}
+					}
+
 					items.push(fileMetadata);
 				}
 			} else if (item instanceof TFolder) {
@@ -285,7 +301,7 @@ export class VaultTools {

 				// Recursively collect from subfolders if needed
 				if (recursive) {
-					await this.collectItems(item, items, recursive, includes, excludes, only, withFrontmatterSummary);
+					await this.collectItems(item, items, recursive, includes, excludes, only, withFrontmatterSummary, includeWordCount);
 				}
 			}
 		}
@@ -386,7 +402,7 @@ export class VaultTools {
 	}

 	// Phase 3: Discovery Endpoints
-	async stat(path: string): Promise<CallToolResult> {
+	async stat(path: string, includeWordCount: boolean = false): Promise<CallToolResult> {
 		// Validate path
 		if (!PathUtils.isValidVaultPath(path)) {
 			return {
@@ -417,11 +433,23 @@ export class VaultTools {

 		// Check if it's a file
 		if (item instanceof TFile) {
+			const metadata = this.createFileMetadata(item);
+
+			// Optionally include word count
+			if (includeWordCount) {
+				try {
+					const content = await this.vault.read(item);
+					metadata.wordCount = ContentUtils.countWords(content);
+				} catch (error) {
+					// Skip word count if file can't be read (binary file, etc.)
+				}
+			}
+
 			const result: StatResult = {
 				path: normalizedPath,
 				exists: true,
 				kind: "file",
-				metadata: this.createFileMetadata(item)
+				metadata
 			};
 			return {
 				content: [{