7 min read

Get Document

Retrieve a processed document and its extracted data by ID

Retrieve a processed document by its unique identifier. Returns the document metadata, processing status, and extracted data.

GET/v1/documents/:id

Overview

Use this endpoint to:

  • Retrieve extracted data from a processed document
  • Check the status of an async processing job
  • Access document metadata and confidence scores
  • Download the original file reference

Request

Headers

ParameterTypeDescription
X-API-Keyrequired
stringYour DocuRift API key (format: frc_xxxxx)

Path Parameters

ParameterTypeDescription
idrequired
stringDocument ID (format: doc_xxxxx)

Query Parameters

ParameterTypeDescription
includeRawText
booleanInclude raw OCR text in response
Default: false
includePageImages
booleanInclude signed URLs for page thumbnail images
Default: false

Code Examples

cURL

curl
curl -X GET "https://api.docurift.com/v1/documents/doc_abc123xyz456" \
-H "X-API-Key: frc_your_api_key_here"

cURL (with options)

curl_options
curl -X GET "https://api.docurift.com/v1/documents/doc_abc123xyz456?includeRawText=true&includePageImages=true" \
-H "X-API-Key: frc_your_api_key_here"

Python

get_document.py
import requests
import os

API_KEY = os.getenv('DOCURIFT_API_KEY')
API_URL = 'https://api.docurift.com/v1'

def get_document(document_id, include_raw_text=False, include_page_images=False):
  """Retrieve a document by ID."""
  headers = {
      'X-API-Key': API_KEY
  }

  params = {}
  if include_raw_text:
      params['includeRawText'] = 'true'
  if include_page_images:
      params['includePageImages'] = 'true'

  response = requests.get(
      f'{API_URL}/documents/{document_id}',
      headers=headers,
      params=params
  )

  response.raise_for_status()
  return response.json()

# Example usage
result = get_document('doc_abc123xyz456')

document = result['data']
print(f"Document ID: {document['id']}")
print(f"Status: {document['status']}")
print(f"Document Type: {document['documentType']}")
print(f"Confidence: {document['confidence']}")

# Access extracted data
if document['status'] == 'completed':
  extracted = document['extractedData']
  print(f"Invoice Number: {extracted.get('invoiceNumber')}")
  print(f"Total Amount: {extracted.get('totalAmount')}")

JavaScript

getDocument.js
const API_KEY = process.env.DOCURIFT_API_KEY;
const API_URL = 'https://api.docurift.com/v1';

async function getDocument(documentId, options = {}) {
const params = new URLSearchParams();

if (options.includeRawText) {
  params.append('includeRawText', 'true');
}
if (options.includePageImages) {
  params.append('includePageImages', 'true');
}

const url = `${API_URL}/documents/${documentId}${params.toString() ? '?' + params : ''}`;

const response = await fetch(url, {
  headers: {
    'X-API-Key': API_KEY
  }
});

if (!response.ok) {
  const error = await response.json();
  throw new Error(error.error.message);
}

return response.json();
}

// Example usage
const result = await getDocument('doc_abc123xyz456');
const document = result.data;

console.log('Document ID:', document.id);
console.log('Status:', document.status);
console.log('Confidence:', document.confidence);

if (document.status === 'completed') {
console.log('Extracted Data:', document.extractedData);
}

Response

Success Response (200 OK) - Completed Document

response_completed.json
{
"success": true,
"data": {
  "id": "doc_abc123xyz456",
  "organizationId": "org_xyz789",
  "fileName": "invoice.pdf",
  "fileType": "application/pdf",
  "fileSize": 245678,
  "documentType": "invoice",
  "status": "completed",
  "pagesProcessed": 2,
  "confidence": 0.96,
  "extractedData": {
    "invoiceNumber": "INV-2024-00123",
    "invoiceDate": "2024-01-15",
    "dueDate": "2024-02-15",
    "currency": "USD",
    "vendor": {
      "name": "Acme Shipping Co.",
      "address": "123 Harbor Blvd, Los Angeles, CA 90021",
      "taxId": "12-3456789",
      "email": "billing@acmeshipping.com"
    },
    "customer": {
      "name": "Global Imports Inc.",
      "address": "456 Trade St, New York, NY 10001"
    },
    "lineItems": [
      {
        "description": "Ocean Freight - Container 20ft",
        "quantity": 2,
        "unitPrice": 1500.00,
        "total": 3000.00
      }
    ],
    "subtotal": 3000.00,
    "taxAmount": 240.00,
    "totalAmount": 3240.00
  },
  "metadata": {
    "processingTimeMs": 2340,
    "modelVersion": "v2.1.0",
    "pageConfidences": [0.97, 0.95],
    "extractionDetails": {
      "tablesExtracted": 1,
      "fieldsExtracted": 18
    }
  },
  "createdAt": "2024-01-26T10:30:00Z",
  "processedAt": "2024-01-26T10:30:02Z"
}
}

Success Response - Processing Document

response_processing.json
{
"success": true,
"data": {
  "id": "doc_abc123xyz456",
  "organizationId": "org_xyz789",
  "fileName": "large-document.pdf",
  "fileType": "application/pdf",
  "fileSize": 5234567,
  "documentType": "invoice",
  "status": "processing",
  "pagesProcessed": 0,
  "confidence": null,
  "extractedData": null,
  "estimatedCompletionTime": "2024-01-26T10:35:00Z",
  "progress": {
    "currentPage": 8,
    "totalPages": 25,
    "percentComplete": 32
  },
  "createdAt": "2024-01-26T10:30:00Z",
  "processedAt": null
}
}

Success Response - Failed Document

response_failed.json
{
"success": true,
"data": {
  "id": "doc_abc123xyz456",
  "organizationId": "org_xyz789",
  "fileName": "corrupted.pdf",
  "fileType": "application/pdf",
  "fileSize": 12345,
  "documentType": "invoice",
  "status": "failed",
  "pagesProcessed": 0,
  "confidence": null,
  "extractedData": null,
  "error": {
    "code": "PROCESSING_FAILED",
    "message": "Unable to parse PDF: file appears to be corrupted",
    "details": "PDF header is invalid or missing"
  },
  "createdAt": "2024-01-26T10:30:00Z",
  "processedAt": "2024-01-26T10:30:05Z"
}
}

Response with Raw Text

response_with_raw_text.json
{
"success": true,
"data": {
  "id": "doc_abc123xyz456",
  "status": "completed",
  "extractedData": { ... },
  "rawText": {
    "pages": [
      {
        "pageNumber": 1,
        "text": "COMMERCIAL INVOICE\n\nInvoice No: INV-2024-00123\nDate: January 15, 2024\n\nBill To:\nGlobal Imports Inc.\n456 Trade St\nNew York, NY 10001\n..."
      },
      {
        "pageNumber": 2,
        "text": "Page 2 content..."
      }
    ]
  }
}
}

Response Fields

ParameterTypeDescription
id
stringUnique document identifier
organizationId
stringOrganization that owns this document
fileName
stringOriginal uploaded file name
fileType
stringMIME type of the file
fileSize
numberFile size in bytes
documentType
stringDocument type used for processing
status
stringProcessing status: queued, processing, completed, failed
pagesProcessed
numberNumber of pages processed
confidence
numberOverall extraction confidence (0-1), null if not completed
extractedData
objectStructured extracted data, null if not completed
error
objectError details if status is failed
progress
objectProcessing progress for async jobs
metadata
objectProcessing metadata
rawText
objectRaw OCR text (if includeRawText=true)
createdAt
stringISO 8601 timestamp of upload
processedAt
stringISO 8601 timestamp of completion

Document Status Values

| Status | Description | |--------|-------------| | queued | Document is waiting in the processing queue | | processing | Document is currently being processed | | completed | Processing finished successfully | | failed | Processing failed (see error field) |

Error Responses

401 Unauthorized

error_401.json
{
"success": false,
"error": {
  "code": "INVALID_API_KEY",
  "message": "Invalid API key"
}
}

403 Forbidden

error_403.json
{
"success": false,
"error": {
  "code": "FORBIDDEN",
  "message": "You do not have permission to access this document"
}
}

404 Not Found

error_404.json
{
"success": false,
"error": {
  "code": "DOCUMENT_NOT_FOUND",
  "message": "Document with ID 'doc_abc123xyz456' not found"
}
}

Error Codes Reference

| Code | HTTP Status | Description | Solution | |------|-------------|-------------|----------| | INVALID_API_KEY | 401 | API key invalid or expired | Verify API key | | FORBIDDEN | 403 | No access to document | Check organization access | | DOCUMENT_NOT_FOUND | 404 | Document does not exist | Verify document ID |

Best Practices

Polling for Async Documents

When waiting for async document processing, implement exponential backoff:

poll_with_backoff.py
import time

def wait_for_completion(document_id, max_wait=300):
  """Wait for document processing with exponential backoff."""
  interval = 2  # Start with 2 seconds
  max_interval = 30  # Max 30 seconds between polls
  elapsed = 0

  while elapsed < max_wait:
      result = get_document(document_id)
      status = result['data']['status']

      if status == 'completed':
          return result['data']
      elif status == 'failed':
          raise Exception(result['data']['error']['message'])

      # Show progress for long documents
      if 'progress' in result['data']:
          progress = result['data']['progress']
          print(f"Processing: {progress['percentComplete']}% complete")

      time.sleep(interval)
      elapsed += interval
      interval = min(interval * 1.5, max_interval)

  raise TimeoutError("Document processing timed out")

Handling Different Statuses

handle_status.js
async function handleDocument(documentId) {
const result = await getDocument(documentId);
const doc = result.data;

switch (doc.status) {
  case 'completed':
    // Process extracted data
    return processExtractedData(doc.extractedData);

  case 'processing':
  case 'queued':
    // Schedule retry
    console.log(`Document still processing: ${doc.progress?.percentComplete || 0}%`);
    return scheduleRetry(documentId, 5000);

  case 'failed':
    // Handle error
    console.error(`Processing failed: ${doc.error.message}`);
    return handleProcessingError(doc.error);

  default:
    throw new Error(`Unknown status: ${doc.status}`);
}
}
💡

Caching

Document results are immutable once completed. You can safely cache completed document responses on your end to reduce API calls.