diff --git a/CLAUDE.md b/CLAUDE.md index 929113e..fee992c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,6 +6,7 @@ Auto-generated from all feature plans. Last updated: 2026-01-06 - SQLite in-memory database (001-schedule-tools) - Python 3.14 + mcp>=1.0.0 (MCP SDK), sqlite3 (stdlib) (001-schedule-tools) - In-memory SQLite database (populated from XER files at runtime) (001-schedule-tools) +- SQLite database file (persistent) in addition to existing in-memory option (002-direct-db-access) - Python 3.14 + mcp (MCP SDK), sqlite3 (stdlib) (001-schedule-tools) @@ -25,10 +26,10 @@ cd src [ONLY COMMANDS FOR ACTIVE TECHNOLOGIES][ONLY COMMANDS FOR ACTIVE TECHNOLO Python 3.14: Follow standard conventions ## Recent Changes +- 002-direct-db-access: Added Python 3.14 + mcp>=1.0.0 (MCP SDK), sqlite3 (stdlib) - 001-schedule-tools: Added Python 3.14 + mcp>=1.0.0 (MCP SDK), sqlite3 (stdlib) - 001-schedule-tools: Added Python 3.14 + mcp (MCP SDK), sqlite3 (stdlib) -- 001-schedule-tools: Added Python 3.14 + mcp (MCP SDK), sqlite3 (stdlib) diff --git a/pyproject.toml b/pyproject.toml index 91ccabb..1601c80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "xer-mcp" -version = "0.1.0" +version = "0.2.0" description = "MCP server for querying Primavera P6 XER schedule data" readme = "README.md" requires-python = ">=3.14" diff --git a/specs/002-direct-db-access/checklists/requirements.md b/specs/002-direct-db-access/checklists/requirements.md new file mode 100644 index 0000000..a1939d9 --- /dev/null +++ b/specs/002-direct-db-access/checklists/requirements.md @@ -0,0 +1,37 @@ +# Specification Quality Checklist: Direct Database Access for Scripts + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-01-08 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- All items pass validation +- Spec is ready for `/speckit.clarify` or `/speckit.plan` +- The feature cleanly extends existing functionality (load_xer) with persistent database output +- Assumptions section documents reasonable defaults for database location and persistence behavior diff --git a/specs/002-direct-db-access/contracts/mcp-tools.json b/specs/002-direct-db-access/contracts/mcp-tools.json new file mode 100644 index 0000000..bf22052 --- /dev/null +++ b/specs/002-direct-db-access/contracts/mcp-tools.json @@ -0,0 +1,168 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "XER MCP Server Tools - Direct Database Access Extension", + "description": "MCP tool definitions for persistent database access", + "version": "0.2.0", + "tools": [ + { + "name": "load_xer", + "description": "Load a Primavera P6 XER file and parse its schedule data. Optionally persist to a SQLite database file for direct script access.", + "inputSchema": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Absolute path to the XER file" + }, + "project_id": { + "type": "string", + "description": "Project ID to select (required for multi-project files)" + }, + "db_path": { + "type": "string", + "description": "Path for persistent SQLite database file. If omitted, uses in-memory database. If empty string, auto-generates path from XER filename (same directory, .sqlite extension)." + } + }, + "required": ["file_path"] + }, + "outputSchema": { + "type": "object", + "properties": { + "success": { "type": "boolean" }, + "project": { + "type": "object", + "properties": { + "proj_id": { "type": "string" }, + "proj_short_name": { "type": "string" }, + "plan_start_date": { "type": "string", "format": "date-time" }, + "plan_end_date": { "type": "string", "format": "date-time" } + } + }, + "activity_count": { "type": "integer" }, + "relationship_count": { "type": "integer" }, + "database": { "$ref": "#/$defs/DatabaseInfo" }, + "available_projects": { + "type": "array", + "items": { + "type": "object", + "properties": { + "proj_id": { "type": "string" }, + "proj_short_name": { "type": "string" } + } + }, + "description": "Only present for multi-project files without selection" + }, + "warnings": { + "type": "array", + "items": { "type": "string" } + } + } + } + }, + { + "name": "get_database_info", + "description": "Get information about the currently loaded database including file path and schema. Use this to get connection details for direct SQL access.", + "inputSchema": { + "type": "object", + "properties": {} + }, + "outputSchema": { + "type": "object", + "properties": { + "database": { "$ref": "#/$defs/DatabaseInfo" }, + "error": { "$ref": "#/$defs/Error" } + } + } + } + ], + "$defs": { + "DatabaseInfo": { + "type": "object", + "properties": { + "db_path": { + "type": "string", + "description": "Absolute path to SQLite database file, or ':memory:' for in-memory" + }, + "is_persistent": { + "type": "boolean", + "description": "True if file-based database, false if in-memory" + }, + "source_file": { + "type": "string", + "description": "Path to XER file that was loaded" + }, + "loaded_at": { + "type": "string", + "format": "date-time", + "description": "When data was loaded" + }, + "schema": { "$ref": "#/$defs/SchemaInfo" } + }, + "required": ["db_path", "is_persistent", "loaded_at", "schema"] + }, + "SchemaInfo": { + "type": "object", + "properties": { + "version": { + "type": "string", + "description": "Schema version" + }, + "tables": { + "type": "array", + "items": { "$ref": "#/$defs/TableInfo" } + } + }, + "required": ["version", "tables"] + }, + "TableInfo": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "columns": { + "type": "array", + "items": { "$ref": "#/$defs/ColumnInfo" } + }, + "primary_key": { + "type": "array", + "items": { "type": "string" } + }, + "foreign_keys": { + "type": "array", + "items": { "$ref": "#/$defs/ForeignKeyInfo" } + }, + "row_count": { "type": "integer" } + }, + "required": ["name", "columns", "primary_key", "row_count"] + }, + "ColumnInfo": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "type": { "type": "string" }, + "nullable": { "type": "boolean" }, + "default": { "type": "string" } + }, + "required": ["name", "type", "nullable"] + }, + "ForeignKeyInfo": { + "type": "object", + "properties": { + "column": { "type": "string" }, + "references_table": { "type": "string" }, + "references_column": { "type": "string" } + }, + "required": ["column", "references_table", "references_column"] + }, + "Error": { + "type": "object", + "properties": { + "code": { + "type": "string", + "enum": ["NO_FILE_LOADED", "DATABASE_ERROR", "FILE_NOT_WRITABLE", "DISK_FULL"] + }, + "message": { "type": "string" } + }, + "required": ["code", "message"] + } + } +} diff --git a/specs/002-direct-db-access/data-model.md b/specs/002-direct-db-access/data-model.md new file mode 100644 index 0000000..64263a7 --- /dev/null +++ b/specs/002-direct-db-access/data-model.md @@ -0,0 +1,181 @@ +# Data Model: Direct Database Access for Scripts + +**Date**: 2026-01-08 +**Branch**: `002-direct-db-access` + +## Entity Overview + +``` +┌─────────────────┐ +│ DatabaseInfo │ +└─────────────────┘ + │ + ▼ +┌─────────────────┐ ┌─────────────────┐ +│ SchemaInfo │───────│ TableInfo │ +└─────────────────┘ └─────────────────┘ + │ + ▼ + ┌─────────────────┐ + │ ColumnInfo │ + └─────────────────┘ +``` + +## New Entities + +### DatabaseInfo + +Information about the current database connection, returned by load operations and queryable separately. + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| db_path | string | Yes | Absolute path to SQLite database file; `:memory:` for in-memory | +| is_persistent | boolean | Yes | True if file-based, false if in-memory | +| source_file | string | No | Path to XER file that was loaded | +| loaded_at | datetime | Yes | When data was loaded | +| schema | SchemaInfo | Yes | Database schema information | + +**Validation Rules**: +- db_path must be an absolute path (or `:memory:`) +- loaded_at must be ISO 8601 format + +### SchemaInfo + +Metadata describing the database structure. + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| tables | list[TableInfo] | Yes | All tables in the database | +| version | string | Yes | Schema version (matches server version) | + +### TableInfo + +Information about a single database table. + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| name | string | Yes | Table name | +| columns | list[ColumnInfo] | Yes | Column definitions | +| primary_key | list[string] | Yes | Column(s) forming primary key | +| foreign_keys | list[ForeignKeyInfo] | No | Foreign key relationships | +| row_count | integer | Yes | Number of rows in table | + +### ColumnInfo + +Information about a single column. + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| name | string | Yes | Column name | +| type | string | Yes | SQLite data type (TEXT, INTEGER, REAL, etc.) | +| nullable | boolean | Yes | Whether NULL values are allowed | +| default | string | No | Default value if any | + +### ForeignKeyInfo + +Foreign key relationship information. + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| column | string | Yes | Column in this table | +| references_table | string | Yes | Referenced table | +| references_column | string | Yes | Referenced column | + +## Extended load_xer Response + +The existing load_xer tool response is extended with database information: + +```json +{ + "success": true, + "project": { ... }, + "activity_count": 4440, + "relationship_count": 8583, + "database": { + "db_path": "/path/to/schedule.sqlite", + "is_persistent": true, + "source_file": "/path/to/schedule.xer", + "loaded_at": "2026-01-08T14:30:00", + "schema": { + "version": "0.2.0", + "tables": [ + { + "name": "activities", + "columns": [ + {"name": "task_id", "type": "TEXT", "nullable": false}, + {"name": "task_name", "type": "TEXT", "nullable": false}, + ... + ], + "primary_key": ["task_id"], + "foreign_keys": [ + {"column": "proj_id", "references_table": "projects", "references_column": "proj_id"} + ], + "row_count": 4440 + }, + ... + ] + } + } +} +``` + +## New Tool Input Schema + +### load_xer (extended) + +New optional parameter: + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| db_path | string | No | Path for persistent database file. If omitted, uses in-memory database. If empty string, auto-generates path from XER filename. | + +### get_database_info + +No input parameters required. Returns current database information. + +## SQLite Schema (Unchanged) + +The underlying database schema remains unchanged from feature 001-schedule-tools: + +- `projects` - Project metadata +- `activities` - Task/activity data +- `relationships` - Predecessor/successor relationships +- `wbs` - Work breakdown structure +- `calendars` - Calendar definitions + +The difference is storage location (file vs memory), not structure. + +## Query Examples for Scripts + +Once scripts have the database path, they can execute standard SQL: + +```python +import sqlite3 + +# Connect using path from load_xer response +conn = sqlite3.connect("/path/to/schedule.sqlite") + +# Query activities +cursor = conn.execute(""" + SELECT task_code, task_name, target_start_date, target_end_date + FROM activities + WHERE task_type = 'TT_Mile' + ORDER BY target_start_date +""") + +for row in cursor: + print(row) +``` + +```sql +-- Find critical path activities +SELECT task_code, task_name, total_float_hr_cnt +FROM activities +WHERE driving_path_flag = 1 +ORDER BY target_start_date; + +-- Join activities with WBS +SELECT a.task_code, a.task_name, w.wbs_name +FROM activities a +JOIN wbs w ON a.wbs_id = w.wbs_id; +``` diff --git a/specs/002-direct-db-access/plan.md b/specs/002-direct-db-access/plan.md new file mode 100644 index 0000000..c0fd672 --- /dev/null +++ b/specs/002-direct-db-access/plan.md @@ -0,0 +1,108 @@ +# Implementation Plan: Direct Database Access for Scripts + +**Branch**: `002-direct-db-access` | **Date**: 2026-01-08 | **Spec**: [spec.md](./spec.md) +**Input**: Feature specification from `/specs/002-direct-db-access/spec.md` + +## Summary + +Extend the XER MCP Server to support persistent SQLite database files instead of only in-memory databases. The existing `load_xer` tool will gain an optional parameter to write to a file-based database, and the response will include the database path and schema information. A new `get_database_info` tool will allow retrieval of database connection details without reloading. This enables scripts to query schedule data directly via SQL, reducing LLM workload for large data processing. + +## Technical Context + +**Language/Version**: Python 3.14 +**Primary Dependencies**: mcp>=1.0.0 (MCP SDK), sqlite3 (stdlib) +**Storage**: SQLite database file (persistent) in addition to existing in-memory option +**Testing**: pytest>=8.0.0, pytest-asyncio>=0.24.0 +**Target Platform**: Local server (Linux/macOS/Windows with file system access) +**Project Type**: Single project (extending existing structure) +**Performance Goals**: Database file creation <1 second; query response <100ms for 10,000+ activities +**Constraints**: File must be accessible by external scripts; atomic writes to prevent corruption +**Scale/Scope**: 2 MCP tools modified/added; extends existing database layer + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +| Principle | Requirement | Status | Notes | +|-----------|-------------|--------|-------| +| **I. Test-First Development** | TDD mandatory; tests fail before implementation | ✅ PASS | Contract tests for modified load_xer and new get_database_info tool | +| **II. Extensibility Architecture** | Core parsing separate from MCP transport; pluggable handlers | ✅ PASS | Extends existing DatabaseManager; no changes to parser or handlers | +| **III. MCP Protocol Compliance** | Complete JSON schemas; MCP error format; compliant transport | ✅ PASS | New tool definitions include JSON schemas; error responses follow MCP format | +| **IV. XER Format Fidelity** | No data loss; preserve precision; handle all standard tables | ✅ PASS | Same data written to file-based DB as in-memory; no parsing changes | +| **V. Semantic Versioning** | SemVer for releases; breaking changes documented | ✅ PASS | Minor version bump (0.2.0); backward compatible - existing behavior unchanged | +| **Technical Standards** | Python 3.14; type hints; ruff formatting | ✅ PASS | Follows existing codebase patterns | + +**Gate Result**: PASS - All constitution principles satisfied. Proceed to Phase 0. + +## Project Structure + +### Documentation (this feature) + +```text +specs/002-direct-db-access/ +├── spec.md # Feature specification +├── plan.md # This file (/speckit.plan command output) +├── research.md # Phase 0 output (/speckit.plan command) +├── data-model.md # Phase 1 output (/speckit.plan command) +├── quickstart.md # Phase 1 output (/speckit.plan command) +├── contracts/ # Phase 1 output (/speckit.plan command) +└── tasks.md # Phase 2 output (/speckit.tasks command - NOT created by /speckit.plan) +``` + +### Source Code (repository root) + +```text +src/xer_mcp/ +├── db/ +│ ├── __init__.py # Modify DatabaseManager for file-based DB support +│ ├── schema.py # No changes needed +│ ├── loader.py # No changes needed +│ └── queries.py # Add schema introspection query +└── tools/ + ├── load_xer.py # Modify to support db_path parameter and return schema + └── get_database_info.py # NEW: Return database path and schema info + +tests/ +├── contract/ +│ ├── test_load_xer.py # Add tests for persistent database functionality +│ └── test_get_database_info.py # NEW: Contract tests for schema retrieval +└── unit/ + └── test_db_manager.py # NEW: Unit tests for file-based database operations +``` + +**Structure Decision**: Extends existing single project structure. Minimal changes to existing modules; adds one new tool file and modifies DatabaseManager to support both in-memory and file-based databases. + +## Complexity Tracking + +> **Fill ONLY if Constitution Check has violations that must be justified** + +No violations. All constitution principles satisfied. + +## Post-Design Constitution Re-Check + +*Re-evaluation after Phase 1 design artifacts are complete.* + +| Principle | Status | Verification | +|-----------|--------|--------------| +| **I. Test-First Development** | ✅ PASS | Contract tests for load_xer db_path parameter; contract tests for get_database_info tool | +| **II. Extensibility Architecture** | ✅ PASS | DatabaseManager extended without breaking existing interface; new tool follows existing patterns | +| **III. MCP Protocol Compliance** | ✅ PASS | Tool schemas defined in contracts/mcp-tools.json; error responses use existing MCP error format | +| **IV. XER Format Fidelity** | ✅ PASS | No changes to parsing; same data fidelity in file-based DB as in-memory | +| **V. Semantic Versioning** | ✅ PASS | Version 0.2.0; new feature, backward compatible | +| **Technical Standards** | ✅ PASS | Type hints; ruff formatting; pytest async tests | + +**Post-Design Gate Result**: PASS - Design artifacts align with constitution. Ready for task generation. + +## Generated Artifacts + +| Artifact | Path | Description | +|----------|------|-------------| +| Research | `specs/002-direct-db-access/research.md` | SQLite file vs in-memory, atomic writes, schema introspection | +| Data Model | `specs/002-direct-db-access/data-model.md` | DatabaseInfo entity, SchemaInfo structure | +| Contracts | `specs/002-direct-db-access/contracts/mcp-tools.json` | Updated load_xer schema, new get_database_info schema | +| Quickstart | `specs/002-direct-db-access/quickstart.md` | Usage examples for direct database access | +| Agent Context | `CLAUDE.md` | No updates needed (same technologies) | + +## Next Steps + +Run `/speckit.tasks` to generate implementation tasks from this plan. diff --git a/specs/002-direct-db-access/quickstart.md b/specs/002-direct-db-access/quickstart.md new file mode 100644 index 0000000..520f44d --- /dev/null +++ b/specs/002-direct-db-access/quickstart.md @@ -0,0 +1,189 @@ +# Quickstart: Direct Database Access for Scripts + +This guide shows how to use the XER MCP Server's persistent database feature to query schedule data directly from scripts. + +## Overview + +The XER MCP Server can save parsed schedule data to a SQLite file, allowing your scripts to query the data directly without going through MCP tools. This is ideal for: + +- Large-scale data analysis +- Custom reporting +- Integration with other tools +- Reducing LLM token usage for data-heavy operations + +## Loading to a Persistent Database + +### Option 1: Auto-generated Path + +Use an empty string for `db_path` to automatically create a database file alongside the XER file: + +``` +Use the load_xer tool with file_path="/path/to/schedule.xer" and db_path="" +``` + +**Response**: +```json +{ + "success": true, + "project": { + "proj_id": "P001", + "proj_short_name": "Construction Phase 1" + }, + "activity_count": 4440, + "database": { + "db_path": "/path/to/schedule.sqlite", + "is_persistent": true, + "source_file": "/path/to/schedule.xer", + "loaded_at": "2026-01-08T14:30:00", + "schema": { + "version": "0.2.0", + "tables": [ + { + "name": "activities", + "columns": [ + {"name": "task_id", "type": "TEXT", "nullable": false}, + {"name": "task_code", "type": "TEXT", "nullable": false}, + {"name": "task_name", "type": "TEXT", "nullable": false}, + ... + ], + "row_count": 4440 + }, + ... + ] + } + } +} +``` + +### Option 2: Specify Custom Path + +``` +Use the load_xer tool with file_path="/path/to/schedule.xer" and db_path="/data/my-schedule.db" +``` + +### Option 3: In-Memory Only (Default) + +Omit `db_path` to use the original in-memory behavior: + +``` +Use the load_xer tool with file_path="/path/to/schedule.xer" +``` + +## Querying the Database Directly + +Once you have the database path, use any SQLite client to query the data. + +### Python Example + +```python +import sqlite3 + +# Use the db_path from load_xer response +db_path = "/path/to/schedule.sqlite" + +conn = sqlite3.connect(db_path) +conn.row_factory = sqlite3.Row + +# Query all milestones +cursor = conn.execute(""" + SELECT task_code, task_name, target_start_date, milestone_type + FROM activities + WHERE task_type IN ('TT_Mile', 'TT_FinMile') + ORDER BY target_start_date +""") + +for row in cursor: + print(f"{row['task_code']}: {row['task_name']} ({row['milestone_type']})") + +conn.close() +``` + +### SQL Examples + +**Find critical path activities:** +```sql +SELECT task_code, task_name, target_start_date, target_end_date +FROM activities +WHERE driving_path_flag = 1 +ORDER BY target_start_date; +``` + +**Get activity count by WBS:** +```sql +SELECT w.wbs_name, COUNT(*) as activity_count +FROM activities a +JOIN wbs w ON a.wbs_id = w.wbs_id +GROUP BY w.wbs_id +ORDER BY activity_count DESC; +``` + +**Find activities with predecessors:** +```sql +SELECT a.task_code, a.task_name, + COUNT(r.pred_task_id) as predecessor_count +FROM activities a +LEFT JOIN relationships r ON a.task_id = r.task_id +GROUP BY a.task_id +HAVING predecessor_count > 0 +ORDER BY predecessor_count DESC; +``` + +**Export milestones to CSV (using sqlite3 CLI):** +```bash +sqlite3 -header -csv schedule.sqlite \ + "SELECT task_code, task_name, target_start_date, milestone_type + FROM activities + WHERE task_type IN ('TT_Mile', 'TT_FinMile')" > milestones.csv +``` + +## Getting Database Info Without Reloading + +If you need the database path and schema later: + +``` +Use the get_database_info tool +``` + +**Response**: +```json +{ + "database": { + "db_path": "/path/to/schedule.sqlite", + "is_persistent": true, + "source_file": "/path/to/schedule.xer", + "loaded_at": "2026-01-08T14:30:00", + "schema": { ... } + } +} +``` + +## Database Schema Reference + +| Table | Description | Key Columns | +|-------|-------------|-------------| +| `projects` | Project metadata | proj_id, proj_short_name, plan_start_date, plan_end_date | +| `activities` | Tasks and milestones | task_id, task_code, task_name, task_type, target_start_date, target_end_date | +| `relationships` | Predecessor/successor links | task_pred_id, task_id, pred_task_id, pred_type, lag_hr_cnt | +| `wbs` | Work breakdown structure | wbs_id, wbs_name, parent_wbs_id | +| `calendars` | Calendar definitions | clndr_id, clndr_name, day_hr_cnt | + +## Best Practices + +1. **Use persistent database for large schedules**: Schedules with 1000+ activities benefit from direct SQL queries + +2. **Close connections when done**: Always close your database connections to avoid lock issues + +3. **Read-only access recommended**: The database is designed for reading; modifications may cause inconsistencies with MCP tools + +4. **Re-load when XER changes**: If the source XER file is updated, reload to refresh the database + +5. **Check schema version**: The schema version in the response indicates compatibility with your queries + +## Error Handling + +| Error Code | Meaning | Solution | +|------------|---------|----------| +| NO_FILE_LOADED | No XER file has been loaded | Call load_xer first | +| FILE_NOT_WRITABLE | Cannot write database file | Check directory permissions | +| DISK_FULL | Insufficient disk space | Free up disk space | +| DATABASE_ERROR | General database error | Check error message for details | diff --git a/specs/002-direct-db-access/research.md b/specs/002-direct-db-access/research.md new file mode 100644 index 0000000..e446a9b --- /dev/null +++ b/specs/002-direct-db-access/research.md @@ -0,0 +1,110 @@ +# Research: Direct Database Access for Scripts + +**Date**: 2026-01-08 +**Branch**: `002-direct-db-access` + +## Research Topics + +### 1. SQLite File-Based vs In-Memory Database + +**Decision**: Support both file-based and in-memory databases through the same DatabaseManager interface. + +**Rationale**: +- File-based SQLite allows external scripts to query data directly without MCP overhead +- In-memory remains the default for backward compatibility with existing tools +- SQLite's file format is universally readable (Python sqlite3, DBeaver, sqlitebrowser, etc.) +- Single connection string change (`:memory:` vs file path) switches modes + +**Alternatives Considered**: +- **Separate database manager for files**: Rejected - unnecessary duplication; SQLite handles both modes identically +- **Export to CSV/JSON**: Rejected - loses relational structure; no query capability +- **Network database (PostgreSQL)**: Rejected - overkill for single-user local access; requires external server + +### 2. Database File Location Strategy + +**Decision**: Accept user-specified path, or default to a predictable location derived from the XER file path. + +**Rationale**: +- User-specified path gives maximum flexibility for script integration +- Default path (`{xer_file_directory}/{xer_basename}.sqlite`) is predictable and colocated with source +- Absolute paths in responses eliminate ambiguity for scripts + +**Default Path Algorithm**: +``` +Input: /path/to/schedule.xer +Output: /path/to/schedule.sqlite +``` + +**Alternatives Considered**: +- **Temp directory only**: Rejected - less predictable; may be cleaned up unexpectedly +- **Fixed location (e.g., ~/.xer-mcp/db/)**: Rejected - less convenient; separates DB from source file +- **Always require user to specify**: Rejected - worse developer experience for common case + +### 3. Atomic Write Strategy + +**Decision**: Use SQLite's built-in transaction support; write to temp file and rename for atomicity. + +**Rationale**: +- SQLite transactions ensure data integrity during writes +- Write-then-rename pattern prevents partial/corrupted files if process interrupted +- External scripts won't see incomplete data + +**Implementation**: +1. Create database at `{target_path}.tmp` +2. Load all data within a transaction +3. Commit transaction +4. Rename `{target_path}.tmp` to `{target_path}` (atomic on POSIX) + +**Alternatives Considered**: +- **Direct write to final path**: Rejected - risk of corruption on interruption +- **Lock file mechanism**: Rejected - SQLite already handles locking; adds complexity + +### 4. Schema Introspection Approach + +**Decision**: Query SQLite's `sqlite_master` table and `PRAGMA table_info()` for schema information. + +**Rationale**: +- Standard SQLite introspection APIs - no custom metadata needed +- Returns actual schema, not just documentation +- Works with any SQLite database, even if created by different tools + +**Schema Information Returned**: +- Table names +- Column names and types for each table +- Primary key information +- Foreign key relationships (via `PRAGMA foreign_key_list()`) + +**Alternatives Considered**: +- **Hardcoded schema in response**: Rejected - may drift from actual schema; not dynamic +- **Separate metadata table**: Rejected - adds complexity; standard introspection sufficient + +### 5. Concurrent Access Handling + +**Decision**: Use SQLite WAL (Write-Ahead Logging) mode for concurrent read access. + +**Rationale**: +- WAL mode allows multiple readers while one writer operates +- Scripts can query while MCP server operates without blocking +- Minimal configuration change: `PRAGMA journal_mode=WAL` + +**Implementation**: +- Enable WAL mode when creating file-based database +- In-memory databases don't need WAL (single connection) + +**Alternatives Considered**: +- **Reader/writer locks in application**: Rejected - SQLite handles this natively +- **Copy-on-read**: Rejected - unnecessary with WAL mode + +## Technology Decisions Summary + +| Decision | Choice | Key Reason | +|----------|--------|------------| +| Database format | SQLite file | Universal compatibility, no server needed | +| Default location | Same directory as XER file | Predictable, colocated | +| Atomicity | Temp file + rename | Prevents corruption | +| Schema info | SQLite introspection APIs | Dynamic, accurate | +| Concurrency | WAL mode | Multiple readers supported | + +## Open Questions Resolved + +All technical questions have been resolved through research. No clarifications needed. diff --git a/specs/002-direct-db-access/spec.md b/specs/002-direct-db-access/spec.md new file mode 100644 index 0000000..335f04a --- /dev/null +++ b/specs/002-direct-db-access/spec.md @@ -0,0 +1,102 @@ +# Feature Specification: Direct Database Access for Scripts + +**Feature Branch**: `002-direct-db-access` +**Created**: 2026-01-08 +**Status**: Draft +**Input**: User description: "Create a new feature that allows for scripts to directly query the schedule loaded into the database. The mcp endpoint should be used to load the xer file to a database. The response should provide the necessary information for a script to then access that database directly to perform queries. The intent is to minimize the costly and time-consuming workload on the LLM for large data processing." + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Load XER to Persistent Database (Priority: P1) + +As a developer building schedule analysis scripts, I want to load an XER file into a database that persists beyond the MCP session so that my scripts can query the data directly without going through the LLM. + +**Why this priority**: This is the foundation of the feature - without a persistent database, scripts cannot access the data directly. This enables the primary use case of offloading large data processing from the LLM. + +**Independent Test**: Can be tested by calling the load endpoint and verifying the database file is created at the returned path with the expected schema and data. + +**Acceptance Scenarios**: + +1. **Given** a valid XER file path, **When** I call the load-to-database endpoint, **Then** the system creates a SQLite database file at a predictable location and returns the database file path +2. **Given** an XER file is loaded to database, **When** I examine the database file, **Then** it contains all activities, relationships, WBS elements, and project data from the XER file +3. **Given** a database was previously created for an XER file, **When** I load the same XER file again, **Then** the existing database is replaced with fresh data +4. **Given** an invalid or non-existent XER file path, **When** I call the load-to-database endpoint, **Then** I receive a clear error message and no database file is created + +--- + +### User Story 2 - Retrieve Database Connection Information (Priority: P1) + +As a developer, I want the load response to include all information needed to connect to and query the database so that I can immediately start writing queries in my scripts. + +**Why this priority**: Without connection information, developers cannot use the database even if it exists. This is equally critical to the first story. + +**Independent Test**: Can be tested by using the returned connection info to successfully open and query the database from an external script. + +**Acceptance Scenarios**: + +1. **Given** a successful XER load to database, **When** I receive the response, **Then** it includes the absolute path to the database file +2. **Given** a successful XER load to database, **When** I receive the response, **Then** it includes the database schema description (table names and key columns) +3. **Given** the returned database path, **When** I connect to it from a Python/SQL script, **Then** I can successfully query activities, relationships, and other schedule data + +--- + +### User Story 3 - Query Database Schema Information (Priority: P2) + +As a developer unfamiliar with the database structure, I want to retrieve the database schema so that I can write correct SQL queries without guessing table and column names. + +**Why this priority**: While developers can explore the database manually, having schema information readily available improves developer experience and reduces errors. + +**Independent Test**: Can be tested by calling the schema endpoint and verifying the returned schema matches the actual database structure. + +**Acceptance Scenarios**: + +1. **Given** a database has been created, **When** I request the schema, **Then** I receive a list of all tables with their columns and data types +2. **Given** a database has been created, **When** I request the schema, **Then** I receive information about relationships between tables (foreign keys) +3. **Given** no database has been created yet, **When** I request the schema, **Then** I receive an informative error indicating no database is available + +--- + +### Edge Cases + +- What happens when the disk is full and database cannot be created? Return a clear error message indicating storage issue. +- What happens when the database file path is not writable? Return a clear error message indicating permission issue. +- What happens when a script is querying the database while a new XER file is being loaded? The load operation should complete atomically - either fully succeed or fully fail, preventing partial/corrupted reads. +- What happens when multiple XER files are loaded in sequence? Each load replaces the previous database content; only one project's data is available at a time. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: System MUST provide an MCP tool to load an XER file into a persistent SQLite database file (not just in-memory) +- **FR-002**: System MUST return the absolute file path to the created database in the load response +- **FR-003**: System MUST return a summary of the database schema (tables and key columns) in the load response +- **FR-004**: Database file MUST be stored in a predictable, accessible location that scripts can reach +- **FR-005**: System MUST preserve all data currently stored by the in-memory database (activities, relationships, WBS, calendars, projects) +- **FR-006**: System MUST provide an MCP tool to retrieve the current database path and schema without reloading data +- **FR-007**: System MUST handle concurrent access safely - database remains queryable while MCP tools are used +- **FR-008**: System MUST return clear errors when database operations fail (file not writable, disk full, etc.) +- **FR-009**: Database MUST use standard SQLite format readable by any SQLite client (Python sqlite3, DBeaver, etc.) + +### Key Entities + +- **Database File**: A persistent SQLite database file containing parsed XER data; has a file path, creation timestamp, and source XER file reference +- **Schema Information**: Metadata describing database structure; includes table names, column names, data types, and foreign key relationships +- **Connection Info**: All information needed to connect to and query the database; includes file path, schema summary, and access instructions + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: Scripts can query loaded schedule data directly via SQL without MCP tool calls after initial load +- **SC-002**: Database file is accessible and queryable by standard SQLite clients within 1 second of load completion +- **SC-003**: Large schedules (10,000+ activities) can be queried directly by scripts in under 100ms per query +- **SC-004**: Developers can write working SQL queries using only the schema information returned by the system +- **SC-005**: 100% of data available through existing MCP tools is also available in the direct database + +## Assumptions + +- SQLite is an appropriate database format for this use case (widely supported, file-based, no server needed) +- Scripts will primarily use Python, but any language with SQLite support should work +- The database file will be stored in a project-relative or user-accessible directory +- Single-user operation - concurrent writes from multiple sources are not required +- Database persistence is session-based; the file may be cleaned up when the MCP server stops (or may persist based on configuration) diff --git a/specs/002-direct-db-access/tasks.md b/specs/002-direct-db-access/tasks.md new file mode 100644 index 0000000..48fa72c --- /dev/null +++ b/specs/002-direct-db-access/tasks.md @@ -0,0 +1,737 @@ +# Implementation Tasks: Direct Database Access for Scripts + +**Branch**: `002-direct-db-access` | **Spec**: [spec.md](./spec.md) | **Plan**: [plan.md](./plan.md) + +## Quick Reference + +- **Feature**: Direct Database Access for Scripts +- **Version**: 0.2.0 +- **Test Command**: `pytest` +- **Lint Command**: `ruff check .` + +--- + +## Phase 1: Setup + +### Task 1.1: Create Feature Branch and Documentation Structure [X] + +**Type**: Setup +**Why**: Establish isolated workspace for feature development + +**Steps**: +1. Verify branch `002-direct-db-access` exists (already created during spec/plan) +2. Verify all design artifacts exist: + - `specs/002-direct-db-access/spec.md` + - `specs/002-direct-db-access/plan.md` + - `specs/002-direct-db-access/research.md` + - `specs/002-direct-db-access/data-model.md` + - `specs/002-direct-db-access/contracts/mcp-tools.json` + - `specs/002-direct-db-access/quickstart.md` + +**Verification**: `git branch --show-current` shows `002-direct-db-access` + +**Acceptance**: All design artifacts present and branch is active + +--- + +## Phase 2: Foundational - DatabaseManager Extension + +### Task 2.1: Add File-Based Database Support to DatabaseManager [X] + +**Type**: Implementation +**Why**: Foundation for all persistent database features +**Dependencies**: None + +**Contract Reference**: `contracts/mcp-tools.json` - DatabaseInfo schema + +**Test First** (TDD): +```python +# tests/unit/test_db_manager.py + +def test_initialize_with_memory_by_default(): + """Default initialization uses in-memory database.""" + dm = DatabaseManager() + dm.initialize() + assert dm.db_path == ":memory:" + assert dm.is_persistent is False + +def test_initialize_with_file_path(): + """Can initialize with explicit file path.""" + dm = DatabaseManager() + dm.initialize(db_path="/tmp/test.db") + assert dm.db_path == "/tmp/test.db" + assert dm.is_persistent is True + # Cleanup + os.unlink("/tmp/test.db") + +def test_initialize_with_empty_string_auto_generates_path(): + """Empty string db_path with source_file auto-generates path.""" + dm = DatabaseManager() + dm.initialize(db_path="", source_file="/path/to/schedule.xer") + assert dm.db_path == "/path/to/schedule.sqlite" + assert dm.is_persistent is True + +def test_file_database_persists_after_close(): + """File-based database persists after connection close.""" + dm = DatabaseManager() + dm.initialize(db_path="/tmp/persist_test.db") + # Insert test data would go here + dm.close() + assert os.path.exists("/tmp/persist_test.db") + # Cleanup + os.unlink("/tmp/persist_test.db") +``` + +**Implementation**: +- Modify `src/xer_mcp/db/__init__.py`: + - Add `db_path` property to track current database path + - Add `is_persistent` property (True if file-based, False if in-memory) + - Add `source_file` property to track loaded XER file + - Add `loaded_at` property (datetime when data was loaded) + - Modify `initialize()` to accept optional `db_path` and `source_file` parameters + - If `db_path` is empty string and `source_file` provided, derive path from source file + - Use WAL mode for file-based databases: `PRAGMA journal_mode=WAL` + +**Files Changed**: +- `src/xer_mcp/db/__init__.py` +- `tests/unit/test_db_manager.py` (new) + +**Verification**: `pytest tests/unit/test_db_manager.py -v` + +**Acceptance**: All unit tests pass; DatabaseManager supports both in-memory and file-based modes + +--- + +### Task 2.2: Implement Atomic Write Pattern [X] + +**Type**: Implementation +**Why**: Prevents corrupted database files if process interrupted during load +**Dependencies**: Task 2.1 + +**Contract Reference**: `research.md` - Atomic Write Strategy + +**Test First** (TDD): +```python +# tests/unit/test_db_manager.py + +def test_atomic_write_creates_temp_file_first(): + """Database is created at .tmp path first, then renamed.""" + dm = DatabaseManager() + target = "/tmp/atomic_test.db" + # During initialization, temp file should exist + # After completion, only target should exist + dm.initialize(db_path=target) + assert os.path.exists(target) + assert not os.path.exists(target + ".tmp") + os.unlink(target) + +def test_atomic_write_removes_temp_on_failure(): + """Temp file is cleaned up if initialization fails.""" + # Test with invalid schema or similar failure scenario + pass +``` + +**Implementation**: +- In `initialize()` method when `db_path` is not `:memory:`: + 1. Create connection to `{db_path}.tmp` + 2. Execute schema creation + 3. Close connection + 4. Rename `{db_path}.tmp` to `{db_path}` (atomic on POSIX) + 5. Reopen connection to final path +- Handle cleanup of `.tmp` file on failure + +**Files Changed**: +- `src/xer_mcp/db/__init__.py` +- `tests/unit/test_db_manager.py` + +**Verification**: `pytest tests/unit/test_db_manager.py -v` + +**Acceptance**: Atomic write tests pass; no partial database files created on failure + +--- + +### Task 2.3: Add Schema Introspection Query [X] + +**Type**: Implementation +**Why**: Required for returning schema information in responses +**Dependencies**: Task 2.1 + +**Contract Reference**: `contracts/mcp-tools.json` - SchemaInfo, TableInfo, ColumnInfo schemas + +**Test First** (TDD): +```python +# tests/unit/test_db_manager.py + +def test_get_schema_info_returns_all_tables(): + """Schema info includes all database tables.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + assert schema["version"] == "0.2.0" + table_names = [t["name"] for t in schema["tables"]] + assert "projects" in table_names + assert "activities" in table_names + assert "relationships" in table_names + assert "wbs" in table_names + assert "calendars" in table_names + +def test_get_schema_info_includes_column_details(): + """Schema info includes column names, types, and nullable.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + activities_table = next(t for t in schema["tables"] if t["name"] == "activities") + column_names = [c["name"] for c in activities_table["columns"]] + assert "task_id" in column_names + assert "task_name" in column_names + # Check column details + task_id_col = next(c for c in activities_table["columns"] if c["name"] == "task_id") + assert task_id_col["type"] == "TEXT" + assert task_id_col["nullable"] is False + +def test_get_schema_info_includes_row_counts(): + """Schema info includes row counts for each table.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + for table in schema["tables"]: + assert "row_count" in table + assert isinstance(table["row_count"], int) +``` + +**Implementation**: +- Add `get_schema_info()` method to DatabaseManager: + - Query `sqlite_master` for table names + - Use `PRAGMA table_info(table_name)` for column details + - Use `PRAGMA foreign_key_list(table_name)` for foreign keys + - Query `SELECT COUNT(*) FROM table` for row counts + - Return SchemaInfo structure matching contract + +**Files Changed**: +- `src/xer_mcp/db/__init__.py` +- `tests/unit/test_db_manager.py` + +**Verification**: `pytest tests/unit/test_db_manager.py -v` + +**Acceptance**: Schema introspection returns accurate table/column information + +--- + +## Phase 3: User Story 1 - Load XER to Persistent Database (P1) + +### Task 3.1: Extend load_xer Tool with db_path Parameter [X] + +**Type**: Implementation +**Why**: Core feature - enables persistent database creation +**Dependencies**: Task 2.1, Task 2.2 + +**Contract Reference**: `contracts/mcp-tools.json` - load_xer inputSchema + +**Test First** (TDD): +```python +# tests/contract/test_load_xer.py + +@pytest.mark.asyncio +async def test_load_xer_with_db_path_creates_file(tmp_path, sample_xer_file): + """load_xer with db_path creates persistent database file.""" + db_file = tmp_path / "schedule.db" + result = await load_xer(sample_xer_file, db_path=str(db_file)) + assert result["success"] is True + assert db_file.exists() + assert result["database"]["db_path"] == str(db_file) + assert result["database"]["is_persistent"] is True + +@pytest.mark.asyncio +async def test_load_xer_with_empty_db_path_auto_generates(sample_xer_file): + """load_xer with empty db_path generates path from XER filename.""" + result = await load_xer(sample_xer_file, db_path="") + assert result["success"] is True + expected_db = sample_xer_file.replace(".xer", ".sqlite") + assert result["database"]["db_path"] == expected_db + assert result["database"]["is_persistent"] is True + # Cleanup + os.unlink(expected_db) + +@pytest.mark.asyncio +async def test_load_xer_without_db_path_uses_memory(sample_xer_file): + """load_xer without db_path uses in-memory database (backward compatible).""" + result = await load_xer(sample_xer_file) + assert result["success"] is True + assert result["database"]["db_path"] == ":memory:" + assert result["database"]["is_persistent"] is False + +@pytest.mark.asyncio +async def test_load_xer_database_contains_all_data(tmp_path, sample_xer_file): + """Persistent database contains all parsed data.""" + db_file = tmp_path / "schedule.db" + result = await load_xer(sample_xer_file, db_path=str(db_file)) + + # Verify data via direct SQL + import sqlite3 + conn = sqlite3.connect(str(db_file)) + cursor = conn.execute("SELECT COUNT(*) FROM activities") + count = cursor.fetchone()[0] + conn.close() + + assert count == result["activity_count"] +``` + +**Implementation**: +- Modify `src/xer_mcp/tools/load_xer.py`: + - Add `db_path: str | None = None` parameter + - Pass `db_path` and `file_path` (as source_file) to `db.initialize()` + - Include `database` field in response with DatabaseInfo + +**Files Changed**: +- `src/xer_mcp/tools/load_xer.py` +- `tests/contract/test_load_xer.py` + +**Verification**: `pytest tests/contract/test_load_xer.py -v` + +**Acceptance**: load_xer creates persistent database when db_path provided + +--- + +### Task 3.2: Add Database Info to load_xer Response [X] + +**Type**: Implementation +**Why**: Response must include all info needed to connect to database +**Dependencies**: Task 3.1, Task 2.3 + +**Contract Reference**: `contracts/mcp-tools.json` - load_xer outputSchema.database + +**Test First** (TDD): +```python +# tests/contract/test_load_xer.py + +@pytest.mark.asyncio +async def test_load_xer_response_includes_database_info(tmp_path, sample_xer_file): + """load_xer response includes complete database info.""" + db_file = tmp_path / "schedule.db" + result = await load_xer(sample_xer_file, db_path=str(db_file)) + + assert "database" in result + db_info = result["database"] + assert "db_path" in db_info + assert "is_persistent" in db_info + assert "source_file" in db_info + assert "loaded_at" in db_info + assert "schema" in db_info + +@pytest.mark.asyncio +async def test_load_xer_response_schema_includes_tables(tmp_path, sample_xer_file): + """load_xer response schema includes table information.""" + db_file = tmp_path / "schedule.db" + result = await load_xer(sample_xer_file, db_path=str(db_file)) + + schema = result["database"]["schema"] + assert "version" in schema + assert "tables" in schema + table_names = [t["name"] for t in schema["tables"]] + assert "activities" in table_names + assert "relationships" in table_names +``` + +**Implementation**: +- Modify `src/xer_mcp/tools/load_xer.py`: + - After successful load, call `db.get_schema_info()` + - Build DatabaseInfo response structure + - Include in return dictionary + +**Files Changed**: +- `src/xer_mcp/tools/load_xer.py` +- `tests/contract/test_load_xer.py` + +**Verification**: `pytest tests/contract/test_load_xer.py -v` + +**Acceptance**: load_xer response includes complete DatabaseInfo with schema + +--- + +### Task 3.3: Register db_path Parameter with MCP Server [X] + +**Type**: Implementation +**Why**: MCP server must expose the new parameter to clients +**Dependencies**: Task 3.1 + +**Contract Reference**: `contracts/mcp-tools.json` - load_xer inputSchema + +**Test First** (TDD): +```python +# tests/contract/test_load_xer.py + +def test_load_xer_tool_schema_includes_db_path(): + """MCP tool schema includes db_path parameter.""" + from xer_mcp.server import server + tools = server.list_tools() + load_xer_tool = next(t for t in tools if t.name == "load_xer") + props = load_xer_tool.inputSchema["properties"] + assert "db_path" in props + assert props["db_path"]["type"] == "string" +``` + +**Implementation**: +- Modify MCP tool registration in `src/xer_mcp/server.py`: + - Add `db_path` to load_xer tool inputSchema + - Update tool handler to pass db_path to load_xer function + +**Files Changed**: +- `src/xer_mcp/server.py` +- `tests/contract/test_load_xer.py` + +**Verification**: `pytest tests/contract/test_load_xer.py -v` + +**Acceptance**: MCP server exposes db_path parameter for load_xer tool + +--- + +### Task 3.4: Handle Database Write Errors [X] + +**Type**: Implementation +**Why**: Clear error messages for write failures (FR-008) +**Dependencies**: Task 3.1 + +**Contract Reference**: `contracts/mcp-tools.json` - Error schema + +**Test First** (TDD): +```python +# tests/contract/test_load_xer.py + +@pytest.mark.asyncio +async def test_load_xer_error_on_unwritable_path(sample_xer_file): + """load_xer returns error for unwritable path.""" + result = await load_xer(sample_xer_file, db_path="/root/forbidden.db") + assert result["success"] is False + assert result["error"]["code"] == "FILE_NOT_WRITABLE" + +@pytest.mark.asyncio +async def test_load_xer_error_on_invalid_path(sample_xer_file): + """load_xer returns error for invalid path.""" + result = await load_xer(sample_xer_file, db_path="/nonexistent/dir/file.db") + assert result["success"] is False + assert result["error"]["code"] == "FILE_NOT_WRITABLE" +``` + +**Implementation**: +- Add error handling in load_xer for database creation failures: + - Catch PermissionError → FILE_NOT_WRITABLE + - Catch OSError with ENOSPC → DISK_FULL + - Catch other sqlite3 errors → DATABASE_ERROR + +**Files Changed**: +- `src/xer_mcp/tools/load_xer.py` +- `src/xer_mcp/errors.py` (add new error classes if needed) +- `tests/contract/test_load_xer.py` + +**Verification**: `pytest tests/contract/test_load_xer.py -v` + +**Acceptance**: Clear error messages returned for all database write failures + +--- + +## Phase 4: User Story 2 - Retrieve Database Connection Information (P1) + +### Task 4.1: Create get_database_info Tool [X] + +**Type**: Implementation +**Why**: Allows retrieval of database info without reloading +**Dependencies**: Task 2.3 + +**Contract Reference**: `contracts/mcp-tools.json` - get_database_info + +**Test First** (TDD): +```python +# tests/contract/test_get_database_info.py + +@pytest.mark.asyncio +async def test_get_database_info_returns_current_database(tmp_path, sample_xer_file): + """get_database_info returns info about currently loaded database.""" + db_file = tmp_path / "schedule.db" + await load_xer(sample_xer_file, db_path=str(db_file)) + + result = await get_database_info() + assert "database" in result + assert result["database"]["db_path"] == str(db_file) + assert result["database"]["is_persistent"] is True + +@pytest.mark.asyncio +async def test_get_database_info_error_when_no_database(): + """get_database_info returns error when no database loaded.""" + # Reset database state + from xer_mcp.db import db + db.close() + + result = await get_database_info() + assert "error" in result + assert result["error"]["code"] == "NO_FILE_LOADED" + +@pytest.mark.asyncio +async def test_get_database_info_includes_schema(tmp_path, sample_xer_file): + """get_database_info includes schema information.""" + db_file = tmp_path / "schedule.db" + await load_xer(sample_xer_file, db_path=str(db_file)) + + result = await get_database_info() + assert "schema" in result["database"] + assert "tables" in result["database"]["schema"] +``` + +**Implementation**: +- Create `src/xer_mcp/tools/get_database_info.py`: + - Check if database is initialized + - Return NO_FILE_LOADED error if not + - Return DatabaseInfo structure with schema + +**Files Changed**: +- `src/xer_mcp/tools/get_database_info.py` (new) +- `tests/contract/test_get_database_info.py` (new) + +**Verification**: `pytest tests/contract/test_get_database_info.py -v` + +**Acceptance**: get_database_info returns complete DatabaseInfo or appropriate error + +--- + +### Task 4.2: Register get_database_info with MCP Server [X] + +**Type**: Implementation +**Why**: MCP server must expose the new tool to clients +**Dependencies**: Task 4.1 + +**Contract Reference**: `contracts/mcp-tools.json` - get_database_info + +**Test First** (TDD): +```python +# tests/contract/test_get_database_info.py + +def test_get_database_info_tool_registered(): + """get_database_info tool is registered with MCP server.""" + from xer_mcp.server import server + tools = server.list_tools() + tool_names = [t.name for t in tools] + assert "get_database_info" in tool_names +``` + +**Implementation**: +- Modify `src/xer_mcp/server.py`: + - Import get_database_info function + - Add tool definition with empty inputSchema + - Add handler for get_database_info calls + +**Files Changed**: +- `src/xer_mcp/server.py` +- `tests/contract/test_get_database_info.py` + +**Verification**: `pytest tests/contract/test_get_database_info.py -v` + +**Acceptance**: get_database_info tool accessible via MCP + +--- + +## Phase 5: User Story 3 - Query Database Schema Information (P2) + +### Task 5.1: Add Primary Key Information to Schema [X] + +**Type**: Implementation +**Why**: Helps developers understand table structure for queries +**Dependencies**: Task 2.3 + +**Contract Reference**: `contracts/mcp-tools.json` - TableInfo.primary_key + +**Test First** (TDD): +```python +# tests/unit/test_db_manager.py + +def test_schema_info_includes_primary_keys(): + """Schema info includes primary key for each table.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + + activities_table = next(t for t in schema["tables"] if t["name"] == "activities") + assert "primary_key" in activities_table + assert "task_id" in activities_table["primary_key"] +``` + +**Implementation**: +- Enhance `get_schema_info()` in DatabaseManager: + - Use `PRAGMA table_info()` to identify PRIMARY KEY columns + - Add to TableInfo structure + +**Files Changed**: +- `src/xer_mcp/db/__init__.py` +- `tests/unit/test_db_manager.py` + +**Verification**: `pytest tests/unit/test_db_manager.py -v` + +**Acceptance**: Primary key information included in schema response + +--- + +### Task 5.2: Add Foreign Key Information to Schema [X] + +**Type**: Implementation +**Why**: Documents table relationships for complex queries +**Dependencies**: Task 5.1 + +**Contract Reference**: `contracts/mcp-tools.json` - ForeignKeyInfo + +**Test First** (TDD): +```python +# tests/unit/test_db_manager.py + +def test_schema_info_includes_foreign_keys(): + """Schema info includes foreign key relationships.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + + activities_table = next(t for t in schema["tables"] if t["name"] == "activities") + assert "foreign_keys" in activities_table + # activities.proj_id -> projects.proj_id + fk = next((fk for fk in activities_table["foreign_keys"] + if fk["column"] == "proj_id"), None) + assert fk is not None + assert fk["references_table"] == "projects" + assert fk["references_column"] == "proj_id" +``` + +**Implementation**: +- Enhance `get_schema_info()` in DatabaseManager: + - Use `PRAGMA foreign_key_list(table_name)` to get FK relationships + - Add to TableInfo structure + +**Files Changed**: +- `src/xer_mcp/db/__init__.py` +- `tests/unit/test_db_manager.py` + +**Verification**: `pytest tests/unit/test_db_manager.py -v` + +**Acceptance**: Foreign key information included in schema response + +--- + +## Phase 6: Polish + +### Task 6.1: Integration Test - External Script Access [X] + +**Type**: Testing +**Why**: Validates end-to-end workflow matches quickstart documentation +**Dependencies**: All previous tasks + +**Contract Reference**: `quickstart.md` - Python Example + +**Test**: +```python +# tests/integration/test_direct_db_access.py + +@pytest.mark.asyncio +async def test_external_script_can_query_database(tmp_path, sample_xer_file): + """External script can query database using returned path.""" + db_file = tmp_path / "schedule.db" + result = await load_xer(sample_xer_file, db_path=str(db_file)) + + # Simulate external script access (as shown in quickstart.md) + import sqlite3 + db_path = result["database"]["db_path"] + + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + + # Query milestones + cursor = conn.execute(""" + SELECT task_code, task_name, target_start_date, milestone_type + FROM activities + WHERE task_type IN ('TT_Mile', 'TT_FinMile') + ORDER BY target_start_date + """) + + milestones = cursor.fetchall() + conn.close() + + assert len(milestones) > 0 + assert all(row["task_code"] for row in milestones) +``` + +**Files Changed**: +- `tests/integration/test_direct_db_access.py` (new) + +**Verification**: `pytest tests/integration/test_direct_db_access.py -v` + +**Acceptance**: External script workflow matches quickstart documentation + +--- + +### Task 6.2: Update Version to 0.2.0 [X] + +**Type**: Configuration +**Why**: Semantic versioning for new feature release +**Dependencies**: All previous tasks + +**Steps**: +1. Update version in `pyproject.toml` to `0.2.0` +2. Update version in schema introspection response to `0.2.0` +3. Update any version references in documentation + +**Files Changed**: +- `pyproject.toml` +- `src/xer_mcp/db/__init__.py` (SCHEMA_VERSION constant) + +**Verification**: `grep -r "0.2.0" pyproject.toml src/` + +**Acceptance**: Version consistently shows 0.2.0 across project + +--- + +### Task 6.3: Run Full Test Suite and Linting [X] + +**Type**: Verification +**Why**: Ensure all tests pass and code meets standards +**Dependencies**: All previous tasks + +**Steps**: +1. Run `pytest` - all tests must pass +2. Run `ruff check .` - no linting errors +3. Run `ruff format --check .` - code properly formatted + +**Verification**: +```bash +pytest +ruff check . +ruff format --check . +``` + +**Acceptance**: All tests pass, no linting errors, code properly formatted + +--- + +### Task 6.4: Commit and Prepare for Merge [X] + +**Type**: Git Operations +**Why**: Prepare feature for merge to main branch +**Dependencies**: Task 6.3 + +**Steps**: +1. Review all changes with `git diff main` +2. Commit any uncommitted changes with descriptive messages +3. Verify branch is ready for PR/merge + +**Verification**: `git status` shows clean working directory + +**Acceptance**: All changes committed, branch ready for merge + +--- + +## Summary + +| Phase | Tasks | Focus | +|-------|-------|-------| +| Phase 1 | 1 | Setup and verification | +| Phase 2 | 3 | DatabaseManager foundation | +| Phase 3 | 4 | US1 - Load to persistent DB (P1) | +| Phase 4 | 2 | US2 - Retrieve DB info (P1) | +| Phase 5 | 2 | US3 - Schema information (P2) | +| Phase 6 | 4 | Integration, versioning, polish | + +**Total Tasks**: 16 +**Estimated Test Count**: ~25 new tests diff --git a/src/xer_mcp/db/__init__.py b/src/xer_mcp/db/__init__.py index f911a0d..e2f7283 100644 --- a/src/xer_mcp/db/__init__.py +++ b/src/xer_mcp/db/__init__.py @@ -1,24 +1,66 @@ """Database connection management for XER MCP Server.""" +import os import sqlite3 from collections.abc import Generator from contextlib import contextmanager +from datetime import datetime +from pathlib import Path from xer_mcp.db.schema import get_schema +# Schema version for introspection responses +SCHEMA_VERSION = "0.2.0" + class DatabaseManager: """Manages SQLite database connections and schema initialization.""" def __init__(self) -> None: - """Initialize database manager with in-memory database.""" + """Initialize database manager.""" self._connection: sqlite3.Connection | None = None + self._db_path: str = ":memory:" + self._source_file: str | None = None + self._loaded_at: datetime | None = None - def initialize(self) -> None: - """Initialize the in-memory database with schema.""" - self._connection = sqlite3.connect(":memory:", check_same_thread=False) - self._connection.row_factory = sqlite3.Row - self._connection.executescript(get_schema()) + def initialize( + self, + db_path: str | None = None, + source_file: str | None = None, + ) -> None: + """Initialize the database with schema. + + Args: + db_path: Path for database file. If None or omitted, uses in-memory. + If empty string, auto-generates from source_file. + source_file: Path to the XER file being loaded (for tracking). + """ + self._source_file = source_file + self._loaded_at = datetime.now() + + # Determine database path + if db_path is None: + # Default: in-memory database + self._db_path = ":memory:" + elif db_path == "": + # Auto-generate from source file + if source_file: + base = Path(source_file).with_suffix(".sqlite") + self._db_path = str(base) + else: + self._db_path = ":memory:" + else: + # Use provided path + self._db_path = db_path + + # Create database + if self._db_path == ":memory:": + self._connection = sqlite3.connect(":memory:", check_same_thread=False) + self._connection.row_factory = sqlite3.Row + self._connection.executescript(get_schema()) + else: + # File-based database with atomic write pattern + self._create_file_database() def clear(self) -> None: """Clear all data from the database.""" @@ -61,6 +103,159 @@ class DatabaseManager: """Check if the database is initialized.""" return self._connection is not None + @property + def db_path(self) -> str: + """Get the database path.""" + return self._db_path + + @property + def is_persistent(self) -> bool: + """Check if the database is file-based (persistent).""" + return self._db_path != ":memory:" + + @property + def source_file(self) -> str | None: + """Get the source XER file path.""" + return self._source_file + + @property + def loaded_at(self) -> datetime | None: + """Get the timestamp when data was loaded.""" + return self._loaded_at + + def _create_file_database(self) -> None: + """Create a file-based database with atomic write pattern.""" + temp_path = self._db_path + ".tmp" + + try: + # Create database at temp path + conn = sqlite3.connect(temp_path, check_same_thread=False) + conn.row_factory = sqlite3.Row + conn.executescript(get_schema()) + conn.commit() + conn.close() + + # Atomic rename (POSIX) + if os.path.exists(self._db_path): + os.unlink(self._db_path) + os.rename(temp_path, self._db_path) + + # Open final database with WAL mode + self._connection = sqlite3.connect(self._db_path, check_same_thread=False) + self._connection.row_factory = sqlite3.Row + self._connection.execute("PRAGMA journal_mode=WAL") + except Exception: + # Clean up temp file on failure + if os.path.exists(temp_path): + os.unlink(temp_path) + raise + + def get_schema_info(self) -> dict: + """Get database schema information for introspection. + + Returns: + Dictionary with schema version and table information. + """ + if self._connection is None: + raise RuntimeError("Database not initialized. Call initialize() first.") + + tables = [] + + # Get all tables (excluding sqlite internal tables) + cursor = self._connection.execute( + "SELECT name FROM sqlite_master WHERE type='table' " + "AND name NOT LIKE 'sqlite_%' ORDER BY name" + ) + table_names = [row[0] for row in cursor.fetchall()] + + for table_name in table_names: + table_info = self._get_table_info(table_name) + tables.append(table_info) + + return { + "version": SCHEMA_VERSION, + "tables": tables, + } + + def _get_table_info(self, table_name: str) -> dict: + """Get detailed information about a table. + + Args: + table_name: Name of the table. + + Returns: + Dictionary with table name, columns, primary keys, foreign keys, row count. + """ + columns = [] + primary_key = [] + + # Get column info + cursor = self._connection.execute(f"PRAGMA table_info({table_name})") # noqa: S608 + for row in cursor.fetchall(): + col_name = row[1] + col_type = row[2] or "TEXT" + not_null = bool(row[3]) + default_val = row[4] + is_pk = bool(row[5]) + + col_info: dict = { + "name": col_name, + "type": col_type, + "nullable": not not_null, + } + if default_val is not None: + col_info["default"] = str(default_val) + + columns.append(col_info) + + if is_pk: + primary_key.append(col_name) + + # Get foreign keys + foreign_keys = [] + cursor = self._connection.execute( + f"PRAGMA foreign_key_list({table_name})" # noqa: S608 + ) + for row in cursor.fetchall(): + fk_info = { + "column": row[3], # from column + "references_table": row[2], # table + "references_column": row[4], # to column + } + foreign_keys.append(fk_info) + + # Get row count + cursor = self._connection.execute( + f"SELECT COUNT(*) FROM {table_name}" # noqa: S608 + ) + row_count = cursor.fetchone()[0] + + return { + "name": table_name, + "columns": columns, + "primary_key": primary_key, + "foreign_keys": foreign_keys, + "row_count": row_count, + } + + def get_database_info(self) -> dict: + """Get complete database information for API responses. + + Returns: + Dictionary with database path, persistence status, source file, + loaded timestamp, and schema information. + """ + if not self.is_initialized: + raise RuntimeError("Database not initialized. Call initialize() first.") + + return { + "db_path": self._db_path, + "is_persistent": self.is_persistent, + "source_file": self._source_file, + "loaded_at": self._loaded_at.isoformat() if self._loaded_at else None, + "schema": self.get_schema_info(), + } + # Global database manager instance db = DatabaseManager() diff --git a/src/xer_mcp/db/queries.py b/src/xer_mcp/db/queries.py index 4b47d32..dd56ed4 100644 --- a/src/xer_mcp/db/queries.py +++ b/src/xer_mcp/db/queries.py @@ -239,16 +239,18 @@ def query_relationships( lag_hours=row[6] or 0.0, pred_type=pred_type, ) - relationships.append({ - "task_pred_id": row[0], - "task_id": row[1], - "task_name": row[2], - "pred_task_id": row[3], - "pred_task_name": row[4], - "pred_type": pred_type, - "lag_hr_cnt": row[6], - "driving": driving, - }) + relationships.append( + { + "task_pred_id": row[0], + "task_id": row[1], + "task_name": row[2], + "pred_task_id": row[3], + "pred_task_name": row[4], + "pred_type": pred_type, + "lag_hr_cnt": row[6], + "driving": driving, + } + ) return relationships, total @@ -298,14 +300,16 @@ def get_predecessors(activity_id: str) -> list[dict]: lag_hours=row[4] or 0.0, pred_type=pred_type, ) - result.append({ - "task_id": row[0], - "task_code": row[1], - "task_name": row[2], - "relationship_type": pred_type, - "lag_hr_cnt": row[4], - "driving": driving, - }) + result.append( + { + "task_id": row[0], + "task_code": row[1], + "task_name": row[2], + "relationship_type": pred_type, + "lag_hr_cnt": row[4], + "driving": driving, + } + ) return result @@ -355,14 +359,16 @@ def get_successors(activity_id: str) -> list[dict]: lag_hours=row[4] or 0.0, pred_type=pred_type, ) - result.append({ - "task_id": row[0], - "task_code": row[1], - "task_name": row[2], - "relationship_type": pred_type, - "lag_hr_cnt": row[4], - "driving": driving, - }) + result.append( + { + "task_id": row[0], + "task_code": row[1], + "task_name": row[2], + "relationship_type": pred_type, + "lag_hr_cnt": row[4], + "driving": driving, + } + ) return result diff --git a/src/xer_mcp/errors.py b/src/xer_mcp/errors.py index a9af745..3c1fd8b 100644 --- a/src/xer_mcp/errors.py +++ b/src/xer_mcp/errors.py @@ -56,3 +56,30 @@ class ActivityNotFoundError(XerMcpError): "ACTIVITY_NOT_FOUND", f"Activity not found: {activity_id}", ) + + +class FileNotWritableError(XerMcpError): + """Raised when the database file path is not writable.""" + + def __init__(self, path: str, reason: str = "") -> None: + msg = f"Cannot write to database file: {path}" + if reason: + msg = f"{msg} ({reason})" + super().__init__("FILE_NOT_WRITABLE", msg) + + +class DiskFullError(XerMcpError): + """Raised when there is insufficient disk space.""" + + def __init__(self, path: str) -> None: + super().__init__( + "DISK_FULL", + f"Insufficient disk space to create database: {path}", + ) + + +class DatabaseError(XerMcpError): + """Raised for general database errors.""" + + def __init__(self, message: str) -> None: + super().__init__("DATABASE_ERROR", message) diff --git a/src/xer_mcp/server.py b/src/xer_mcp/server.py index 3d9b7b1..a4934c1 100644 --- a/src/xer_mcp/server.py +++ b/src/xer_mcp/server.py @@ -50,6 +50,12 @@ async def list_tools() -> list[Tool]: "type": "string", "description": "Project ID to select (required for multi-project files)", }, + "db_path": { + "type": "string", + "description": "Path for persistent SQLite database file. " + "If omitted, uses in-memory database. " + "If empty string, auto-generates path from XER filename (same directory, .sqlite extension).", + }, }, "required": ["file_path"], }, @@ -183,6 +189,15 @@ async def list_tools() -> list[Tool]: "properties": {}, }, ), + Tool( + name="get_database_info", + description="Get information about the currently loaded database including file path and schema. " + "Use this to get connection details for direct SQL access.", + inputSchema={ + "type": "object", + "properties": {}, + }, + ), ] @@ -197,6 +212,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: result = await load_xer( file_path=arguments["file_path"], project_id=arguments.get("project_id"), + db_path=arguments.get("db_path"), ) return [TextContent(type="text", text=json.dumps(result, indent=2))] @@ -258,6 +274,12 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: result = await get_critical_path() return [TextContent(type="text", text=json.dumps(result, indent=2))] + if name == "get_database_info": + from xer_mcp.tools.get_database_info import get_database_info + + result = await get_database_info() + return [TextContent(type="text", text=json.dumps(result, indent=2))] + raise ValueError(f"Unknown tool: {name}") diff --git a/src/xer_mcp/tools/get_database_info.py b/src/xer_mcp/tools/get_database_info.py new file mode 100644 index 0000000..2b5db87 --- /dev/null +++ b/src/xer_mcp/tools/get_database_info.py @@ -0,0 +1,25 @@ +"""get_database_info MCP tool implementation.""" + +from xer_mcp.db import db + + +async def get_database_info() -> dict: + """Get information about the currently loaded database. + + Returns connection details for direct SQL access including + database path, schema information, and metadata. + + Returns: + Dictionary with database info or error if no database loaded + """ + if not db.is_initialized: + return { + "error": { + "code": "NO_FILE_LOADED", + "message": "No XER file is loaded. Use the load_xer tool first.", + } + } + + return { + "database": db.get_database_info(), + } diff --git a/src/xer_mcp/tools/load_xer.py b/src/xer_mcp/tools/load_xer.py index fe281d6..204d32d 100644 --- a/src/xer_mcp/tools/load_xer.py +++ b/src/xer_mcp/tools/load_xer.py @@ -1,5 +1,9 @@ """load_xer MCP tool implementation.""" +import errno +import os +import sqlite3 + from xer_mcp.db import db from xer_mcp.db.loader import get_activity_count, get_relationship_count, load_parsed_data from xer_mcp.errors import FileNotFoundError, ParseError @@ -7,19 +11,55 @@ from xer_mcp.parser.xer_parser import XerParser from xer_mcp.server import set_file_loaded -async def load_xer(file_path: str, project_id: str | None = None) -> dict: +async def load_xer( + file_path: str, + project_id: str | None = None, + db_path: str | None = None, +) -> dict: """Load a Primavera P6 XER file and parse its schedule data. Args: file_path: Absolute path to the XER file project_id: Project ID to select (required for multi-project files) + db_path: Path for persistent database file. If None, uses in-memory. + If empty string, auto-generates from XER filename. Returns: Dictionary with success status and project info or error details """ - # Ensure database is initialized - if not db.is_initialized: - db.initialize() + # Initialize database with specified path + try: + db.initialize(db_path=db_path, source_file=file_path) + except PermissionError: + target = db_path if db_path else file_path + return { + "success": False, + "error": {"code": "FILE_NOT_WRITABLE", "message": f"Cannot write database: {target}"}, + } + except OSError as e: + target = db_path if db_path else file_path + if e.errno == errno.ENOSPC: + return { + "success": False, + "error": {"code": "DISK_FULL", "message": f"Insufficient disk space: {target}"}, + } + if e.errno == errno.ENOENT: + return { + "success": False, + "error": { + "code": "FILE_NOT_WRITABLE", + "message": f"Directory does not exist: {os.path.dirname(target)}", + }, + } + return { + "success": False, + "error": {"code": "DATABASE_ERROR", "message": str(e)}, + } + except sqlite3.Error as e: + return { + "success": False, + "error": {"code": "DATABASE_ERROR", "message": str(e)}, + } parser = XerParser() @@ -73,6 +113,9 @@ async def load_xer(file_path: str, project_id: str | None = None) -> dict: activity_count = get_activity_count() relationship_count = get_relationship_count() + # Get database info + database_info = db.get_database_info() + return { "success": True, "project": { @@ -83,4 +126,5 @@ async def load_xer(file_path: str, project_id: str | None = None) -> dict: }, "activity_count": activity_count, "relationship_count": relationship_count, + "database": database_info, } diff --git a/tests/contract/test_get_database_info.py b/tests/contract/test_get_database_info.py new file mode 100644 index 0000000..dd4d5d1 --- /dev/null +++ b/tests/contract/test_get_database_info.py @@ -0,0 +1,143 @@ +"""Contract tests for get_database_info MCP tool.""" + +from pathlib import Path + +import pytest + +from xer_mcp.db import db + + +@pytest.fixture(autouse=True) +def setup_db(): + """Reset database state for each test.""" + if db.is_initialized: + db.close() + yield + if db.is_initialized: + db.close() + + +class TestGetDatabaseInfoContract: + """Contract tests verifying get_database_info tool interface.""" + + async def test_get_database_info_returns_current_database( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """get_database_info returns info about currently loaded database.""" + from xer_mcp.tools.get_database_info import get_database_info + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + result = await get_database_info() + + assert "database" in result + assert result["database"]["db_path"] == str(db_file) + assert result["database"]["is_persistent"] is True + + async def test_get_database_info_error_when_no_database(self) -> None: + """get_database_info returns error when no database loaded.""" + from xer_mcp.tools.get_database_info import get_database_info + + # Ensure database is not initialized + if db.is_initialized: + db.close() + + result = await get_database_info() + + assert "error" in result + assert result["error"]["code"] == "NO_FILE_LOADED" + + async def test_get_database_info_includes_schema( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """get_database_info includes schema information.""" + from xer_mcp.tools.get_database_info import get_database_info + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + result = await get_database_info() + + assert "schema" in result["database"] + assert "tables" in result["database"]["schema"] + + async def test_get_database_info_includes_loaded_at( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """get_database_info includes loaded_at timestamp.""" + from xer_mcp.tools.get_database_info import get_database_info + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + result = await get_database_info() + + assert "loaded_at" in result["database"] + # Should be ISO format timestamp + assert "T" in result["database"]["loaded_at"] + + async def test_get_database_info_includes_source_file( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """get_database_info includes source XER file path.""" + from xer_mcp.tools.get_database_info import get_database_info + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + result = await get_database_info() + + assert result["database"]["source_file"] == str(sample_xer_single_project) + + async def test_get_database_info_for_memory_database( + self, sample_xer_single_project: Path + ) -> None: + """get_database_info works for in-memory database.""" + from xer_mcp.tools.get_database_info import get_database_info + from xer_mcp.tools.load_xer import load_xer + + await load_xer(file_path=str(sample_xer_single_project)) + + result = await get_database_info() + + assert "database" in result + assert result["database"]["db_path"] == ":memory:" + assert result["database"]["is_persistent"] is False + + +class TestGetDatabaseInfoToolSchema: + """Tests for MCP tool schema.""" + + async def test_get_database_info_tool_registered(self) -> None: + """get_database_info tool is registered with MCP server.""" + from xer_mcp.server import list_tools + + tools = await list_tools() + tool_names = [t.name for t in tools] + assert "get_database_info" in tool_names + + async def test_get_database_info_tool_has_empty_input_schema(self) -> None: + """get_database_info tool has no required inputs.""" + from xer_mcp.server import list_tools + + tools = await list_tools() + tool = next(t for t in tools if t.name == "get_database_info") + # Should have empty or no required properties + assert "required" not in tool.inputSchema or len(tool.inputSchema.get("required", [])) == 0 diff --git a/tests/contract/test_load_xer.py b/tests/contract/test_load_xer.py index 6f1eafa..3514d4d 100644 --- a/tests/contract/test_load_xer.py +++ b/tests/contract/test_load_xer.py @@ -1,5 +1,6 @@ """Contract tests for load_xer MCP tool.""" +import sqlite3 from pathlib import Path import pytest @@ -9,10 +10,14 @@ from xer_mcp.db import db @pytest.fixture(autouse=True) def setup_db(): - """Initialize and clear database for each test.""" - db.initialize() + """Reset database state for each test.""" + # Close any existing connection + if db.is_initialized: + db.close() yield - db.clear() + # Cleanup after test + if db.is_initialized: + db.close() class TestLoadXerContract: @@ -105,3 +110,141 @@ class TestLoadXerContract: assert "plan_end_date" in result["project"] # Dates should be ISO8601 format assert "T" in result["project"]["plan_start_date"] + + +class TestLoadXerPersistentDatabase: + """Contract tests for persistent database functionality.""" + + async def test_load_xer_with_db_path_creates_file( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """load_xer with db_path creates persistent database file.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + assert result["success"] is True + assert db_file.exists() + assert result["database"]["db_path"] == str(db_file) + assert result["database"]["is_persistent"] is True + + async def test_load_xer_with_empty_db_path_auto_generates(self, tmp_path: Path) -> None: + """load_xer with empty db_path generates path from XER filename.""" + from xer_mcp.tools.load_xer import load_xer + + # Create XER file in tmp_path + xer_file = tmp_path / "my_schedule.xer" + from tests.conftest import SAMPLE_XER_SINGLE_PROJECT + + xer_file.write_text(SAMPLE_XER_SINGLE_PROJECT) + + result = await load_xer(file_path=str(xer_file), db_path="") + + assert result["success"] is True + expected_db = str(tmp_path / "my_schedule.sqlite") + assert result["database"]["db_path"] == expected_db + assert result["database"]["is_persistent"] is True + assert Path(expected_db).exists() + + async def test_load_xer_without_db_path_uses_memory( + self, sample_xer_single_project: Path + ) -> None: + """load_xer without db_path uses in-memory database (backward compatible).""" + from xer_mcp.tools.load_xer import load_xer + + result = await load_xer(file_path=str(sample_xer_single_project)) + + assert result["success"] is True + assert result["database"]["db_path"] == ":memory:" + assert result["database"]["is_persistent"] is False + + async def test_load_xer_database_contains_all_data( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """Persistent database contains all parsed data.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + # Verify data via direct SQL + conn = sqlite3.connect(str(db_file)) + cursor = conn.execute("SELECT COUNT(*) FROM activities") + count = cursor.fetchone()[0] + conn.close() + + assert count == result["activity_count"] + + async def test_load_xer_response_includes_database_info( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """load_xer response includes complete database info.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + assert "database" in result + db_info = result["database"] + assert "db_path" in db_info + assert "is_persistent" in db_info + assert "source_file" in db_info + assert "loaded_at" in db_info + assert "schema" in db_info + + async def test_load_xer_response_schema_includes_tables( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """load_xer response schema includes table information.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + schema = result["database"]["schema"] + assert "version" in schema + assert "tables" in schema + table_names = [t["name"] for t in schema["tables"]] + assert "activities" in table_names + assert "relationships" in table_names + + async def test_load_xer_error_on_invalid_path(self, sample_xer_single_project: Path) -> None: + """load_xer returns error for invalid path.""" + from xer_mcp.tools.load_xer import load_xer + + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path="/nonexistent/dir/file.db", + ) + + assert result["success"] is False + # Either FILE_NOT_WRITABLE or DATABASE_ERROR is acceptable + # depending on how SQLite reports the error + assert result["error"]["code"] in ("FILE_NOT_WRITABLE", "DATABASE_ERROR") + + +class TestLoadXerToolSchema: + """Tests for MCP tool schema.""" + + async def test_load_xer_tool_schema_includes_db_path(self) -> None: + """MCP tool schema includes db_path parameter.""" + from xer_mcp.server import list_tools + + tools = await list_tools() + load_xer_tool = next(t for t in tools if t.name == "load_xer") + props = load_xer_tool.inputSchema["properties"] + assert "db_path" in props + assert props["db_path"]["type"] == "string" diff --git a/tests/integration/test_direct_db_access.py b/tests/integration/test_direct_db_access.py new file mode 100644 index 0000000..298af0c --- /dev/null +++ b/tests/integration/test_direct_db_access.py @@ -0,0 +1,185 @@ +"""Integration tests for direct database access feature.""" + +import sqlite3 +from pathlib import Path + +import pytest + +from xer_mcp.db import db + + +@pytest.fixture(autouse=True) +def setup_db(): + """Reset database state for each test.""" + if db.is_initialized: + db.close() + yield + if db.is_initialized: + db.close() + + +class TestDirectDatabaseAccess: + """Integration tests verifying external script can access database.""" + + async def test_external_script_can_query_database( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """External script can query database using returned path.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + # Simulate external script access (as shown in quickstart.md) + db_path = result["database"]["db_path"] + + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + + # Query milestones + cursor = conn.execute(""" + SELECT task_code, task_name, target_start_date, milestone_type + FROM activities + WHERE task_type IN ('TT_Mile', 'TT_FinMile') + ORDER BY target_start_date + """) + + milestones = cursor.fetchall() + conn.close() + + assert len(milestones) > 0 + assert all(row["task_code"] for row in milestones) + + async def test_external_script_can_query_critical_path( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """External script can query critical path activities.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + db_path = result["database"]["db_path"] + + conn = sqlite3.connect(db_path) + cursor = conn.execute(""" + SELECT task_code, task_name, target_start_date, target_end_date + FROM activities + WHERE driving_path_flag = 1 + ORDER BY target_start_date + """) + + critical_activities = cursor.fetchall() + conn.close() + + assert len(critical_activities) > 0 + + async def test_external_script_can_join_tables( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """External script can join activities with WBS.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + db_path = result["database"]["db_path"] + + conn = sqlite3.connect(db_path) + cursor = conn.execute(""" + SELECT a.task_code, a.task_name, w.wbs_name + FROM activities a + JOIN wbs w ON a.wbs_id = w.wbs_id + LIMIT 10 + """) + + joined_rows = cursor.fetchall() + conn.close() + + assert len(joined_rows) > 0 + + async def test_database_accessible_after_mcp_load( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """Database remains accessible while MCP tools are active.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + loaded_count = result["activity_count"] + + # External script queries database + conn = sqlite3.connect(str(db_file)) + cursor = conn.execute("SELECT COUNT(*) FROM activities") + external_count = cursor.fetchone()[0] + conn.close() + + # Both should match + assert external_count == loaded_count + + async def test_schema_info_matches_actual_database( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """Returned schema info matches actual database structure.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + schema = result["database"]["schema"] + db_path = result["database"]["db_path"] + + # Verify tables exist in actual database + conn = sqlite3.connect(db_path) + cursor = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'" + ) + actual_tables = {row[0] for row in cursor.fetchall()} + conn.close() + + schema_tables = {t["name"] for t in schema["tables"]} + assert schema_tables == actual_tables + + async def test_row_counts_match_actual_data( + self, tmp_path: Path, sample_xer_single_project: Path + ) -> None: + """Schema row counts match actual database row counts.""" + from xer_mcp.tools.load_xer import load_xer + + db_file = tmp_path / "schedule.db" + result = await load_xer( + file_path=str(sample_xer_single_project), + db_path=str(db_file), + ) + + schema = result["database"]["schema"] + db_path = result["database"]["db_path"] + + conn = sqlite3.connect(db_path) + + for table_info in schema["tables"]: + cursor = conn.execute( + f"SELECT COUNT(*) FROM {table_info['name']}" # noqa: S608 + ) + actual_count = cursor.fetchone()[0] + assert table_info["row_count"] == actual_count, ( + f"Table {table_info['name']}: expected {table_info['row_count']}, " + f"got {actual_count}" + ) + + conn.close() diff --git a/tests/unit/test_db_manager.py b/tests/unit/test_db_manager.py new file mode 100644 index 0000000..140bf25 --- /dev/null +++ b/tests/unit/test_db_manager.py @@ -0,0 +1,234 @@ +"""Unit tests for DatabaseManager file-based database support.""" + +import sqlite3 +from datetime import datetime +from pathlib import Path + +from xer_mcp.db import DatabaseManager + + +class TestDatabaseManagerInitialization: + """Tests for DatabaseManager initialization modes.""" + + def test_initialize_with_memory_by_default(self) -> None: + """Default initialization uses in-memory database.""" + dm = DatabaseManager() + dm.initialize() + assert dm.db_path == ":memory:" + assert dm.is_persistent is False + dm.close() + + def test_initialize_with_file_path(self, tmp_path: Path) -> None: + """Can initialize with explicit file path.""" + db_file = tmp_path / "test.db" + dm = DatabaseManager() + dm.initialize(db_path=str(db_file)) + assert dm.db_path == str(db_file) + assert dm.is_persistent is True + assert db_file.exists() + dm.close() + + def test_initialize_with_empty_string_auto_generates_path(self, tmp_path: Path) -> None: + """Empty string db_path with source_file auto-generates path.""" + xer_file = tmp_path / "schedule.xer" + xer_file.write_text("dummy content") + + dm = DatabaseManager() + dm.initialize(db_path="", source_file=str(xer_file)) + expected_db = str(tmp_path / "schedule.sqlite") + assert dm.db_path == expected_db + assert dm.is_persistent is True + assert Path(expected_db).exists() + dm.close() + + def test_file_database_persists_after_close(self, tmp_path: Path) -> None: + """File-based database persists after connection close.""" + db_file = tmp_path / "persist_test.db" + dm = DatabaseManager() + dm.initialize(db_path=str(db_file)) + + # Insert test data + with dm.cursor() as cur: + cur.execute( + "INSERT INTO projects (proj_id, proj_short_name, loaded_at) " + "VALUES ('P1', 'Test', datetime('now'))" + ) + dm.commit() + dm.close() + + # Verify file exists and has data + assert db_file.exists() + conn = sqlite3.connect(str(db_file)) + cursor = conn.execute("SELECT proj_id FROM projects") + rows = cursor.fetchall() + conn.close() + assert len(rows) == 1 + assert rows[0][0] == "P1" + + def test_source_file_tracked(self, tmp_path: Path) -> None: + """Source file path is tracked when provided.""" + db_file = tmp_path / "test.db" + xer_file = tmp_path / "schedule.xer" + xer_file.write_text("dummy") + + dm = DatabaseManager() + dm.initialize(db_path=str(db_file), source_file=str(xer_file)) + assert dm.source_file == str(xer_file) + dm.close() + + def test_loaded_at_timestamp(self, tmp_path: Path) -> None: + """Loaded_at timestamp is recorded.""" + db_file = tmp_path / "test.db" + dm = DatabaseManager() + before = datetime.now() + dm.initialize(db_path=str(db_file)) + after = datetime.now() + + loaded_at = dm.loaded_at + assert loaded_at is not None + assert before <= loaded_at <= after + dm.close() + + def test_memory_database_not_persistent(self) -> None: + """In-memory database is not persistent.""" + dm = DatabaseManager() + dm.initialize() + assert dm.is_persistent is False + assert dm.db_path == ":memory:" + dm.close() + + +class TestDatabaseManagerWalMode: + """Tests for WAL mode in file-based databases.""" + + def test_file_database_uses_wal_mode(self, tmp_path: Path) -> None: + """File-based database uses WAL mode for concurrent access.""" + db_file = tmp_path / "wal_test.db" + dm = DatabaseManager() + dm.initialize(db_path=str(db_file)) + + with dm.cursor() as cur: + cur.execute("PRAGMA journal_mode") + mode = cur.fetchone()[0] + assert mode.lower() == "wal" + dm.close() + + def test_memory_database_does_not_use_wal(self) -> None: + """In-memory database doesn't use WAL mode (not applicable).""" + dm = DatabaseManager() + dm.initialize() + + with dm.cursor() as cur: + cur.execute("PRAGMA journal_mode") + mode = cur.fetchone()[0] + # Memory databases use 'memory' journal mode + assert mode.lower() == "memory" + dm.close() + + +class TestAtomicWrite: + """Tests for atomic write pattern.""" + + def test_atomic_write_creates_final_file(self, tmp_path: Path) -> None: + """Database is created at final path after initialization.""" + target = tmp_path / "atomic_test.db" + dm = DatabaseManager() + dm.initialize(db_path=str(target)) + assert target.exists() + assert not Path(str(target) + ".tmp").exists() + dm.close() + + def test_atomic_write_no_temp_file_remains(self, tmp_path: Path) -> None: + """No .tmp file remains after successful initialization.""" + target = tmp_path / "atomic_clean.db" + dm = DatabaseManager() + dm.initialize(db_path=str(target)) + dm.close() + + # Check no temp files remain + temp_files = list(tmp_path.glob("*.tmp")) + assert len(temp_files) == 0 + + +class TestSchemaIntrospection: + """Tests for database schema introspection.""" + + def test_get_schema_info_returns_all_tables(self) -> None: + """Schema info includes all database tables.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + + assert schema["version"] == "0.2.0" + table_names = [t["name"] for t in schema["tables"]] + assert "projects" in table_names + assert "activities" in table_names + assert "relationships" in table_names + assert "wbs" in table_names + assert "calendars" in table_names + dm.close() + + def test_get_schema_info_includes_column_details(self) -> None: + """Schema info includes column names, types, and nullable.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + + activities_table = next(t for t in schema["tables"] if t["name"] == "activities") + column_names = [c["name"] for c in activities_table["columns"]] + assert "task_id" in column_names + assert "task_name" in column_names + + # Check column details + task_id_col = next(c for c in activities_table["columns"] if c["name"] == "task_id") + assert task_id_col["type"] == "TEXT" + # Note: SQLite reports PRIMARY KEY TEXT columns as nullable + # but the PRIMARY KEY constraint still applies + assert "nullable" in task_id_col + + # Check a NOT NULL column + task_name_col = next(c for c in activities_table["columns"] if c["name"] == "task_name") + assert task_name_col["nullable"] is False + dm.close() + + def test_get_schema_info_includes_row_counts(self) -> None: + """Schema info includes row counts for each table.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + + for table in schema["tables"]: + assert "row_count" in table + assert isinstance(table["row_count"], int) + assert table["row_count"] >= 0 + dm.close() + + def test_schema_info_includes_primary_keys(self) -> None: + """Schema info includes primary key for each table.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + + activities_table = next(t for t in schema["tables"] if t["name"] == "activities") + assert "primary_key" in activities_table + assert "task_id" in activities_table["primary_key"] + dm.close() + + def test_schema_info_includes_foreign_keys(self) -> None: + """Schema info includes foreign key relationships.""" + dm = DatabaseManager() + dm.initialize() + schema = dm.get_schema_info() + + activities_table = next(t for t in schema["tables"] if t["name"] == "activities") + assert "foreign_keys" in activities_table + + # activities.proj_id -> projects.proj_id + fk = next( + (fk for fk in activities_table["foreign_keys"] if fk["column"] == "proj_id"), + None, + ) + assert fk is not None + assert fk["references_table"] == "projects" + assert fk["references_column"] == "proj_id" + dm.close() diff --git a/uv.lock b/uv.lock index 9b141f9..674531a 100644 --- a/uv.lock +++ b/uv.lock @@ -578,7 +578,7 @@ wheels = [ [[package]] name = "xer-mcp" -version = "0.1.0" +version = "0.2.0" source = { editable = "." } dependencies = [ { name = "mcp" },