feat: add automatic word count and link validation to write operations

Add automatic word count and link validation to create_note, update_note, and update_sections operations to provide immediate feedback on note content quality and link integrity. Features: - Word counting excludes frontmatter and Obsidian comments, includes all other content (code blocks, inline code, headings, lists, etc.) - Link validation checks wikilinks, heading links, and embeds - Results categorized as: valid links, broken notes (note doesn't exist), and broken headings (note exists but heading missing) - Detailed broken link info includes line number and context snippet - Human-readable summary (e.g., "15 links: 12 valid, 2 broken notes, 1 broken heading") - Opt-out capability via validateLinks parameter (default: true) for performance-critical operations Implementation: - New ContentUtils.countWords() for word counting logic - Enhanced LinkUtils.validateLinks() for comprehensive link validation - Updated create_note, update_note, update_sections to return wordCount and linkValidation fields - Updated MCP tool descriptions to document new features and parameters - update_note now returns structured JSON instead of simple success message Response format changes: - create_note: added wordCount and linkValidation fields - update_note: changed to structured response with wordCount and linkValidation - update_sections: added wordCount and linkValidation fields Breaking changes: - update_note response format changed from simple message to structured JSON
2025-10-30 09:40:57 -04:00
parent c574a237ce
commit f0808c0346
10 changed files with 679 additions and 21 deletions
@@ -0,0 +1,42 @@
+import { FrontmatterUtils } from './frontmatter-utils';
+
+/**
+ * Utility class for content analysis and manipulation
+ */
+export class ContentUtils {
+	/**
+	 * Count words in content, excluding frontmatter and Obsidian comments
+	 * Includes all other content: headings, paragraphs, lists, code blocks, inline code
+	 *
+	 * @param content The full markdown content to analyze
+	 * @returns Word count (excludes frontmatter and Obsidian comments only)
+	 */
+	static countWords(content: string): number {
+		// Extract frontmatter to get content without it
+		const { contentWithoutFrontmatter } = FrontmatterUtils.extractFrontmatter(content);
+
+		// Remove Obsidian comments (%% ... %%)
+		// Handle both single-line and multi-line comments
+		const withoutComments = this.removeObsidianComments(contentWithoutFrontmatter);
+
+		// Split by whitespace and count non-empty tokens
+		const words = withoutComments
+			.split(/\s+/)
+			.filter(word => word.trim().length > 0);
+
+		return words.length;
+	}
+
+	/**
+	 * Remove Obsidian comments from content
+	 * Handles both %% single line %% and multi-line comments
+	 *
+	 * @param content Content to process
+	 * @returns Content with Obsidian comments removed
+	 */
+	private static removeObsidianComments(content: string): string {
+		// Remove Obsidian comments: %% ... %%
+		// Use non-greedy match to handle multiple comments
+		return content.replace(/%%[\s\S]*?%%/g, '');
+	}
+}
@@ -41,6 +41,46 @@ export interface UnresolvedLink {
 	suggestions: string[];
 }

+/**
+ * Broken link information (note doesn't exist)
+ */
+export interface BrokenNoteLink {
+	/** Original link text */
+	link: string;
+	/** Line number where the link appears */
+	line: number;
+	/** Context snippet around the link */
+	context: string;
+}
+
+/**
+ * Broken heading link information (note exists but heading doesn't)
+ */
+export interface BrokenHeadingLink {
+	/** Original link text */
+	link: string;
+	/** Line number where the link appears */
+	line: number;
+	/** Context snippet around the link */
+	context: string;
+	/** The note path that exists */
+	note: string;
+}
+
+/**
+ * Link validation result
+ */
+export interface LinkValidationResult {
+	/** Array of valid links */
+	valid: string[];
+	/** Array of broken note links (note doesn't exist) */
+	brokenNotes: BrokenNoteLink[];
+	/** Array of broken heading links (note exists but heading doesn't) */
+	brokenHeadings: BrokenHeadingLink[];
+	/** Human-readable summary */
+	summary: string;
+}
+
 /**
 * Backlink occurrence in a file
 */
@@ -394,4 +434,108 @@ export class LinkUtils {

 		return { resolvedLinks, unresolvedLinks };
 	}
+
+	/**
+	 * Validate all links in content (wikilinks, heading links, and embeds)
+	 * Returns categorized results: valid, broken notes, and broken headings
+	 *
+	 * @param vault Vault adapter for file operations
+	 * @param metadata Metadata cache adapter for link resolution
+	 * @param content File content to validate
+	 * @param sourcePath Path of the file containing the links
+	 * @returns Structured validation result with categorized links
+	 */
+	static async validateLinks(
+		vault: IVaultAdapter,
+		metadata: IMetadataCacheAdapter,
+		content: string,
+		sourcePath: string
+	): Promise<LinkValidationResult> {
+		const valid: string[] = [];
+		const brokenNotes: BrokenNoteLink[] = [];
+		const brokenHeadings: BrokenHeadingLink[] = [];
+
+		// Parse all wikilinks from content (includes embeds which start with !)
+		const wikilinks = this.parseWikilinks(content);
+		const lines = content.split('\n');
+
+		for (const link of wikilinks) {
+			// Check if this is a heading link
+			const hasHeading = link.target.includes('#');
+
+			if (hasHeading) {
+				// Split note path and heading
+				const [notePath, ...headingParts] = link.target.split('#');
+				const heading = headingParts.join('#'); // Rejoin in case heading has # in it
+
+				// Try to resolve the note
+				const resolvedFile = this.resolveLink(vault, metadata, sourcePath, notePath || sourcePath);
+
+				if (!resolvedFile) {
+					// Note doesn't exist
+					const context = this.extractSnippet(lines, link.line - 1, 100);
+					brokenNotes.push({
+						link: link.raw,
+						line: link.line,
+						context
+					});
+				} else {
+					// Note exists, check if heading exists
+					const fileCache = metadata.getFileCache(resolvedFile);
+					const headings = fileCache?.headings || [];
+
+					// Normalize heading for comparison (remove # and trim)
+					const normalizedHeading = heading.trim().toLowerCase();
+					const headingExists = headings.some(h =>
+						h.heading.trim().toLowerCase() === normalizedHeading
+					);
+
+					if (headingExists) {
+						// Both note and heading exist
+						valid.push(link.raw);
+					} else {
+						// Note exists but heading doesn't
+						const context = this.extractSnippet(lines, link.line - 1, 100);
+						brokenHeadings.push({
+							link: link.raw,
+							line: link.line,
+							context,
+							note: resolvedFile.path
+						});
+					}
+				}
+			} else {
+				// Regular link or embed (no heading)
+				const resolvedFile = this.resolveLink(vault, metadata, sourcePath, link.target);
+
+				if (resolvedFile) {
+					valid.push(link.raw);
+				} else {
+					const context = this.extractSnippet(lines, link.line - 1, 100);
+					brokenNotes.push({
+						link: link.raw,
+						line: link.line,
+						context
+					});
+				}
+			}
+		}
+
+		// Generate summary
+		const totalLinks = valid.length + brokenNotes.length + brokenHeadings.length;
+		let summary = `${totalLinks} links: ${valid.length} valid`;
+		if (brokenNotes.length > 0) {
+			summary += `, ${brokenNotes.length} broken note${brokenNotes.length === 1 ? '' : 's'}`;
+		}
+		if (brokenHeadings.length > 0) {
+			summary += `, ${brokenHeadings.length} broken heading${brokenHeadings.length === 1 ? '' : 's'}`;
+		}
+
+		return {
+			valid,
+			brokenNotes,
+			brokenHeadings,
+			summary
+		};
+	}
 }