Initial commit: tabular-extract skill
Claude Code skill that extracts structured data from document collections into tabular format using Claude's native document understanding capabilities.
This commit is contained in:
122
scripts/test_convert_docx.py
Normal file
122
scripts/test_convert_docx.py
Normal file
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for convert_docx.py"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
SCRIPT = os.path.join(os.path.dirname(__file__), "convert_docx.py")
|
||||
|
||||
|
||||
def test_missing_argument():
|
||||
"""Script should print usage and exit 1 when no args given."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, SCRIPT],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
assert result.returncode == 1
|
||||
assert "Usage:" in result.stderr
|
||||
|
||||
|
||||
def test_nonexistent_file():
|
||||
"""Script should error on a file that doesn't exist."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, SCRIPT, "/tmp/nonexistent_file_abc123.docx"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
assert result.returncode == 1
|
||||
assert "Error" in result.stderr or "not found" in result.stderr.lower()
|
||||
|
||||
|
||||
def test_non_docx_file():
|
||||
"""Script should error on a non-DOCX file."""
|
||||
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
|
||||
f.write(b"hello world")
|
||||
f.flush()
|
||||
result = subprocess.run(
|
||||
[sys.executable, SCRIPT, f.name],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
os.unlink(f.name)
|
||||
assert result.returncode == 1
|
||||
|
||||
|
||||
def test_valid_docx():
|
||||
"""Script should extract text from a valid DOCX file."""
|
||||
try:
|
||||
from docx import Document
|
||||
except ImportError:
|
||||
print("SKIP: python-docx not installed")
|
||||
return
|
||||
|
||||
doc = Document()
|
||||
doc.add_paragraph("Hello from test document")
|
||||
doc.add_paragraph("Second paragraph here")
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as f:
|
||||
doc.save(f.name)
|
||||
result = subprocess.run(
|
||||
[sys.executable, SCRIPT, f.name],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
os.unlink(f.name)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "Hello from test document" in result.stdout
|
||||
assert "Second paragraph here" in result.stdout
|
||||
|
||||
|
||||
def test_docx_with_table():
|
||||
"""Script should extract table content from a DOCX file."""
|
||||
try:
|
||||
from docx import Document
|
||||
except ImportError:
|
||||
print("SKIP: python-docx not installed")
|
||||
return
|
||||
|
||||
doc = Document()
|
||||
doc.add_paragraph("Before table")
|
||||
table = doc.add_table(rows=2, cols=2)
|
||||
table.cell(0, 0).text = "Header1"
|
||||
table.cell(0, 1).text = "Header2"
|
||||
table.cell(1, 0).text = "Value1"
|
||||
table.cell(1, 1).text = "Value2"
|
||||
doc.add_paragraph("After table")
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as f:
|
||||
doc.save(f.name)
|
||||
result = subprocess.run(
|
||||
[sys.executable, SCRIPT, f.name],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
os.unlink(f.name)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "Header1" in result.stdout
|
||||
assert "Value1" in result.stdout
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tests = [
|
||||
test_missing_argument,
|
||||
test_nonexistent_file,
|
||||
test_non_docx_file,
|
||||
test_valid_docx,
|
||||
test_docx_with_table,
|
||||
]
|
||||
passed = 0
|
||||
failed = 0
|
||||
for test in tests:
|
||||
try:
|
||||
test()
|
||||
print(f" PASS: {test.__name__}")
|
||||
passed += 1
|
||||
except AssertionError as e:
|
||||
print(f" FAIL: {test.__name__} - {e}")
|
||||
failed += 1
|
||||
except Exception as e:
|
||||
print(f" ERROR: {test.__name__} - {e}")
|
||||
failed += 1
|
||||
print(f"\n{passed} passed, {failed} failed")
|
||||
sys.exit(1 if failed else 0)
|
||||
Reference in New Issue
Block a user