Get Document
Retrieve a processed document and its extracted data by ID
Retrieve a processed document by its unique identifier. Returns the document metadata, processing status, and extracted data.
/v1/documents/:idOverview
Use this endpoint to:
- Retrieve extracted data from a processed document
- Check the status of an async processing job
- Access document metadata and confidence scores
- Download the original file reference
Request
Headers
| Parameter | Type | Description |
|---|---|---|
X-API-Keyrequired | string | Your DocuRift API key (format: frc_xxxxx) |
Path Parameters
| Parameter | Type | Description |
|---|---|---|
idrequired | string | Document ID (format: doc_xxxxx) |
Query Parameters
| Parameter | Type | Description |
|---|---|---|
includeRawText | boolean | Include raw OCR text in response Default: false |
includePageImages | boolean | Include signed URLs for page thumbnail images Default: false |
Code Examples
cURL
curl -X GET "https://api.docurift.com/v1/documents/doc_abc123xyz456" \
-H "X-API-Key: frc_your_api_key_here"cURL (with options)
curl -X GET "https://api.docurift.com/v1/documents/doc_abc123xyz456?includeRawText=true&includePageImages=true" \
-H "X-API-Key: frc_your_api_key_here"Python
import requests
import os
API_KEY = os.getenv('DOCURIFT_API_KEY')
API_URL = 'https://api.docurift.com/v1'
def get_document(document_id, include_raw_text=False, include_page_images=False):
"""Retrieve a document by ID."""
headers = {
'X-API-Key': API_KEY
}
params = {}
if include_raw_text:
params['includeRawText'] = 'true'
if include_page_images:
params['includePageImages'] = 'true'
response = requests.get(
f'{API_URL}/documents/{document_id}',
headers=headers,
params=params
)
response.raise_for_status()
return response.json()
# Example usage
result = get_document('doc_abc123xyz456')
document = result['data']
print(f"Document ID: {document['id']}")
print(f"Status: {document['status']}")
print(f"Document Type: {document['documentType']}")
print(f"Confidence: {document['confidence']}")
# Access extracted data
if document['status'] == 'completed':
extracted = document['extractedData']
print(f"Invoice Number: {extracted.get('invoiceNumber')}")
print(f"Total Amount: {extracted.get('totalAmount')}")JavaScript
const API_KEY = process.env.DOCURIFT_API_KEY;
const API_URL = 'https://api.docurift.com/v1';
async function getDocument(documentId, options = {}) {
const params = new URLSearchParams();
if (options.includeRawText) {
params.append('includeRawText', 'true');
}
if (options.includePageImages) {
params.append('includePageImages', 'true');
}
const url = `${API_URL}/documents/${documentId}${params.toString() ? '?' + params : ''}`;
const response = await fetch(url, {
headers: {
'X-API-Key': API_KEY
}
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.error.message);
}
return response.json();
}
// Example usage
const result = await getDocument('doc_abc123xyz456');
const document = result.data;
console.log('Document ID:', document.id);
console.log('Status:', document.status);
console.log('Confidence:', document.confidence);
if (document.status === 'completed') {
console.log('Extracted Data:', document.extractedData);
}Response
Success Response (200 OK) - Completed Document
{
"success": true,
"data": {
"id": "doc_abc123xyz456",
"organizationId": "org_xyz789",
"fileName": "invoice.pdf",
"fileType": "application/pdf",
"fileSize": 245678,
"documentType": "invoice",
"status": "completed",
"pagesProcessed": 2,
"confidence": 0.96,
"extractedData": {
"invoiceNumber": "INV-2024-00123",
"invoiceDate": "2024-01-15",
"dueDate": "2024-02-15",
"currency": "USD",
"vendor": {
"name": "Acme Shipping Co.",
"address": "123 Harbor Blvd, Los Angeles, CA 90021",
"taxId": "12-3456789",
"email": "billing@acmeshipping.com"
},
"customer": {
"name": "Global Imports Inc.",
"address": "456 Trade St, New York, NY 10001"
},
"lineItems": [
{
"description": "Ocean Freight - Container 20ft",
"quantity": 2,
"unitPrice": 1500.00,
"total": 3000.00
}
],
"subtotal": 3000.00,
"taxAmount": 240.00,
"totalAmount": 3240.00
},
"metadata": {
"processingTimeMs": 2340,
"modelVersion": "v2.1.0",
"pageConfidences": [0.97, 0.95],
"extractionDetails": {
"tablesExtracted": 1,
"fieldsExtracted": 18
}
},
"createdAt": "2024-01-26T10:30:00Z",
"processedAt": "2024-01-26T10:30:02Z"
}
}Success Response - Processing Document
{
"success": true,
"data": {
"id": "doc_abc123xyz456",
"organizationId": "org_xyz789",
"fileName": "large-document.pdf",
"fileType": "application/pdf",
"fileSize": 5234567,
"documentType": "invoice",
"status": "processing",
"pagesProcessed": 0,
"confidence": null,
"extractedData": null,
"estimatedCompletionTime": "2024-01-26T10:35:00Z",
"progress": {
"currentPage": 8,
"totalPages": 25,
"percentComplete": 32
},
"createdAt": "2024-01-26T10:30:00Z",
"processedAt": null
}
}Success Response - Failed Document
{
"success": true,
"data": {
"id": "doc_abc123xyz456",
"organizationId": "org_xyz789",
"fileName": "corrupted.pdf",
"fileType": "application/pdf",
"fileSize": 12345,
"documentType": "invoice",
"status": "failed",
"pagesProcessed": 0,
"confidence": null,
"extractedData": null,
"error": {
"code": "PROCESSING_FAILED",
"message": "Unable to parse PDF: file appears to be corrupted",
"details": "PDF header is invalid or missing"
},
"createdAt": "2024-01-26T10:30:00Z",
"processedAt": "2024-01-26T10:30:05Z"
}
}Response with Raw Text
{
"success": true,
"data": {
"id": "doc_abc123xyz456",
"status": "completed",
"extractedData": { ... },
"rawText": {
"pages": [
{
"pageNumber": 1,
"text": "COMMERCIAL INVOICE\n\nInvoice No: INV-2024-00123\nDate: January 15, 2024\n\nBill To:\nGlobal Imports Inc.\n456 Trade St\nNew York, NY 10001\n..."
},
{
"pageNumber": 2,
"text": "Page 2 content..."
}
]
}
}
}Response Fields
| Parameter | Type | Description |
|---|---|---|
id | string | Unique document identifier |
organizationId | string | Organization that owns this document |
fileName | string | Original uploaded file name |
fileType | string | MIME type of the file |
fileSize | number | File size in bytes |
documentType | string | Document type used for processing |
status | string | Processing status: queued, processing, completed, failed |
pagesProcessed | number | Number of pages processed |
confidence | number | Overall extraction confidence (0-1), null if not completed |
extractedData | object | Structured extracted data, null if not completed |
error | object | Error details if status is failed |
progress | object | Processing progress for async jobs |
metadata | object | Processing metadata |
rawText | object | Raw OCR text (if includeRawText=true) |
createdAt | string | ISO 8601 timestamp of upload |
processedAt | string | ISO 8601 timestamp of completion |
Document Status Values
| Status | Description |
|--------|-------------|
| queued | Document is waiting in the processing queue |
| processing | Document is currently being processed |
| completed | Processing finished successfully |
| failed | Processing failed (see error field) |
Error Responses
401 Unauthorized
{
"success": false,
"error": {
"code": "INVALID_API_KEY",
"message": "Invalid API key"
}
}403 Forbidden
{
"success": false,
"error": {
"code": "FORBIDDEN",
"message": "You do not have permission to access this document"
}
}404 Not Found
{
"success": false,
"error": {
"code": "DOCUMENT_NOT_FOUND",
"message": "Document with ID 'doc_abc123xyz456' not found"
}
}Error Codes Reference
| Code | HTTP Status | Description | Solution |
|------|-------------|-------------|----------|
| INVALID_API_KEY | 401 | API key invalid or expired | Verify API key |
| FORBIDDEN | 403 | No access to document | Check organization access |
| DOCUMENT_NOT_FOUND | 404 | Document does not exist | Verify document ID |
Best Practices
Polling for Async Documents
When waiting for async document processing, implement exponential backoff:
import time
def wait_for_completion(document_id, max_wait=300):
"""Wait for document processing with exponential backoff."""
interval = 2 # Start with 2 seconds
max_interval = 30 # Max 30 seconds between polls
elapsed = 0
while elapsed < max_wait:
result = get_document(document_id)
status = result['data']['status']
if status == 'completed':
return result['data']
elif status == 'failed':
raise Exception(result['data']['error']['message'])
# Show progress for long documents
if 'progress' in result['data']:
progress = result['data']['progress']
print(f"Processing: {progress['percentComplete']}% complete")
time.sleep(interval)
elapsed += interval
interval = min(interval * 1.5, max_interval)
raise TimeoutError("Document processing timed out")Handling Different Statuses
async function handleDocument(documentId) {
const result = await getDocument(documentId);
const doc = result.data;
switch (doc.status) {
case 'completed':
// Process extracted data
return processExtractedData(doc.extractedData);
case 'processing':
case 'queued':
// Schedule retry
console.log(`Document still processing: ${doc.progress?.percentComplete || 0}%`);
return scheduleRetry(documentId, 5000);
case 'failed':
// Handle error
console.error(`Processing failed: ${doc.error.message}`);
return handleProcessingError(doc.error);
default:
throw new Error(`Unknown status: ${doc.status}`);
}
}Caching
Document results are immutable once completed. You can safely cache completed document responses on your end to reduce API calls.
Related Endpoints
- Process Document (Sync) - Upload and process documents
- Process Document (Async) - Async processing
- List Documents - List all documents
- Delete Document - Delete a document