7 min read

Get Document

Retrieve a processed document and its extracted data by ID

Retrieve a processed document by its unique identifier. Returns the document metadata, processing status, and extracted data.

GET/v1/documents/:id

Overview

Use this endpoint to:

Retrieve extracted data from a processed document
Check the status of an async processing job
Access document metadata and confidence scores
Download the original file reference

Request

Headers

Parameter	Type	Description
`X-API-Key`required	`string`	Your DocuRift API key (format: frc_xxxxx)

Path Parameters

Parameter	Type	Description
`id`required	`string`	Document ID (format: doc_xxxxx)

Query Parameters

Parameter	Type	Description
`includeRawText`	`boolean`	Include raw OCR text in response Default: `false`
`includePageImages`	`boolean`	Include signed URLs for page thumbnail images Default: `false`

Code Examples

cURL

curl

curl -X GET "https://api.docurift.com/v1/documents/doc_abc123xyz456" \
-H "X-API-Key: frc_your_api_key_here"

cURL (with options)

curl_options

curl -X GET "https://api.docurift.com/v1/documents/doc_abc123xyz456?includeRawText=true&includePageImages=true" \
-H "X-API-Key: frc_your_api_key_here"

Python

get_document.py

import requests
import os

API_KEY = os.getenv('DOCURIFT_API_KEY')
API_URL = 'https://api.docurift.com/v1'

def get_document(document_id, include_raw_text=False, include_page_images=False):
  """Retrieve a document by ID."""
  headers = {
      'X-API-Key': API_KEY
  }

  params = {}
  if include_raw_text:
      params['includeRawText'] = 'true'
  if include_page_images:
      params['includePageImages'] = 'true'

  response = requests.get(
      f'{API_URL}/documents/{document_id}',
      headers=headers,
      params=params
  )

  response.raise_for_status()
  return response.json()

# Example usage
result = get_document('doc_abc123xyz456')

document = result['data']
print(f"Document ID: {document['id']}")
print(f"Status: {document['status']}")
print(f"Document Type: {document['documentType']}")
print(f"Confidence: {document['confidence']}")

# Access extracted data
if document['status'] == 'completed':
  extracted = document['extractedData']
  print(f"Invoice Number: {extracted.get('invoiceNumber')}")
  print(f"Total Amount: {extracted.get('totalAmount')}")

JavaScript

getDocument.js

const API_KEY = process.env.DOCURIFT_API_KEY;
const API_URL = 'https://api.docurift.com/v1';

async function getDocument(documentId, options = {}) {
const params = new URLSearchParams();

if (options.includeRawText) {
  params.append('includeRawText', 'true');
}
if (options.includePageImages) {
  params.append('includePageImages', 'true');
}

const url = `${API_URL}/documents/${documentId}${params.toString() ? '?' + params : ''}`;

const response = await fetch(url, {
  headers: {
    'X-API-Key': API_KEY
  }
});

if (!response.ok) {
  const error = await response.json();
  throw new Error(error.error.message);
}

return response.json();
}

// Example usage
const result = await getDocument('doc_abc123xyz456');
const document = result.data;

console.log('Document ID:', document.id);
console.log('Status:', document.status);
console.log('Confidence:', document.confidence);

if (document.status === 'completed') {
console.log('Extracted Data:', document.extractedData);
}

Response

Success Response (200 OK) - Completed Document

response_completed.json

{
"success": true,
"data": {
  "id": "doc_abc123xyz456",
  "organizationId": "org_xyz789",
  "fileName": "invoice.pdf",
  "fileType": "application/pdf",
  "fileSize": 245678,
  "documentType": "invoice",
  "status": "completed",
  "pagesProcessed": 2,
  "confidence": 0.96,
  "extractedData": {
    "invoiceNumber": "INV-2024-00123",
    "invoiceDate": "2024-01-15",
    "dueDate": "2024-02-15",
    "currency": "USD",
    "vendor": {
      "name": "Acme Shipping Co.",
      "address": "123 Harbor Blvd, Los Angeles, CA 90021",
      "taxId": "12-3456789",
      "email": "billing@acmeshipping.com"
    },
    "customer": {
      "name": "Global Imports Inc.",
      "address": "456 Trade St, New York, NY 10001"
    },
    "lineItems": [
      {
        "description": "Ocean Freight - Container 20ft",
        "quantity": 2,
        "unitPrice": 1500.00,
        "total": 3000.00
      }
    ],
    "subtotal": 3000.00,
    "taxAmount": 240.00,
    "totalAmount": 3240.00
  },
  "metadata": {
    "processingTimeMs": 2340,
    "modelVersion": "v2.1.0",
    "pageConfidences": [0.97, 0.95],
    "extractionDetails": {
      "tablesExtracted": 1,
      "fieldsExtracted": 18
    }
  },
  "createdAt": "2024-01-26T10:30:00Z",
  "processedAt": "2024-01-26T10:30:02Z"
}
}

Success Response - Processing Document

response_processing.json

{
"success": true,
"data": {
  "id": "doc_abc123xyz456",
  "organizationId": "org_xyz789",
  "fileName": "large-document.pdf",
  "fileType": "application/pdf",
  "fileSize": 5234567,
  "documentType": "invoice",
  "status": "processing",
  "pagesProcessed": 0,
  "confidence": null,
  "extractedData": null,
  "estimatedCompletionTime": "2024-01-26T10:35:00Z",
  "progress": {
    "currentPage": 8,
    "totalPages": 25,
    "percentComplete": 32
  },
  "createdAt": "2024-01-26T10:30:00Z",
  "processedAt": null
}
}

Success Response - Failed Document

response_failed.json

{
"success": true,
"data": {
  "id": "doc_abc123xyz456",
  "organizationId": "org_xyz789",
  "fileName": "corrupted.pdf",
  "fileType": "application/pdf",
  "fileSize": 12345,
  "documentType": "invoice",
  "status": "failed",
  "pagesProcessed": 0,
  "confidence": null,
  "extractedData": null,
  "error": {
    "code": "PROCESSING_FAILED",
    "message": "Unable to parse PDF: file appears to be corrupted",
    "details": "PDF header is invalid or missing"
  },
  "createdAt": "2024-01-26T10:30:00Z",
  "processedAt": "2024-01-26T10:30:05Z"
}
}

Response with Raw Text

response_with_raw_text.json

{
"success": true,
"data": {
  "id": "doc_abc123xyz456",
  "status": "completed",
  "extractedData": { ... },
  "rawText": {
    "pages": [
      {
        "pageNumber": 1,
        "text": "COMMERCIAL INVOICE\n\nInvoice No: INV-2024-00123\nDate: January 15, 2024\n\nBill To:\nGlobal Imports Inc.\n456 Trade St\nNew York, NY 10001\n..."
      },
      {
        "pageNumber": 2,
        "text": "Page 2 content..."
      }
    ]
  }
}
}

Response Fields

Parameter	Type	Description
`id`	`string`	Unique document identifier
`organizationId`	`string`	Organization that owns this document
`fileName`	`string`	Original uploaded file name
`fileType`	`string`	MIME type of the file
`fileSize`	`number`	File size in bytes
`documentType`	`string`	Document type used for processing
`status`	`string`	Processing status: queued, processing, completed, failed
`pagesProcessed`	`number`	Number of pages processed
`confidence`	`number`	Overall extraction confidence (0-1), null if not completed
`extractedData`	`object`	Structured extracted data, null if not completed
`error`	`object`	Error details if status is failed
`progress`	`object`	Processing progress for async jobs
`metadata`	`object`	Processing metadata
`rawText`	`object`	Raw OCR text (if includeRawText=true)
`createdAt`	`string`	ISO 8601 timestamp of upload
`processedAt`	`string`	ISO 8601 timestamp of completion

| Status | Description | |--------|-------------| | queued | Document is waiting in the processing queue | | processing | Document is currently being processed | | completed | Processing finished successfully | | failed | Processing failed (see error field) |

Error Responses

401 Unauthorized

error_401.json

{
"success": false,
"error": {
  "code": "INVALID_API_KEY",
  "message": "Invalid API key"
}
}

403 Forbidden

error_403.json

{
"success": false,
"error": {
  "code": "FORBIDDEN",
  "message": "You do not have permission to access this document"
}
}

404 Not Found

error_404.json

{
"success": false,
"error": {
  "code": "DOCUMENT_NOT_FOUND",
  "message": "Document with ID 'doc_abc123xyz456' not found"
}
}

Error Codes Reference

| Code | HTTP Status | Description | Solution | |------|-------------|-------------|----------| | INVALID_API_KEY | 401 | API key invalid or expired | Verify API key | | FORBIDDEN | 403 | No access to document | Check organization access | | DOCUMENT_NOT_FOUND | 404 | Document does not exist | Verify document ID |

Best Practices

Polling for Async Documents

When waiting for async document processing, implement exponential backoff:

poll_with_backoff.py

import time

def wait_for_completion(document_id, max_wait=300):
  """Wait for document processing with exponential backoff."""
  interval = 2  # Start with 2 seconds
  max_interval = 30  # Max 30 seconds between polls
  elapsed = 0

  while elapsed < max_wait:
      result = get_document(document_id)
      status = result['data']['status']

      if status == 'completed':
          return result['data']
      elif status == 'failed':
          raise Exception(result['data']['error']['message'])

      # Show progress for long documents
      if 'progress' in result['data']:
          progress = result['data']['progress']
          print(f"Processing: {progress['percentComplete']}% complete")

      time.sleep(interval)
      elapsed += interval
      interval = min(interval * 1.5, max_interval)

  raise TimeoutError("Document processing timed out")

Handling Different Statuses

handle_status.js

async function handleDocument(documentId) {
const result = await getDocument(documentId);
const doc = result.data;

switch (doc.status) {
  case 'completed':
    // Process extracted data
    return processExtractedData(doc.extractedData);

  case 'processing':
  case 'queued':
    // Schedule retry
    console.log(`Document still processing: ${doc.progress?.percentComplete || 0}%`);
    return scheduleRetry(documentId, 5000);

  case 'failed':
    // Handle error
    console.error(`Processing failed: ${doc.error.message}`);
    return handleProcessingError(doc.error);

  default:
    throw new Error(`Unknown status: ${doc.status}`);
}
}

💡

Caching

Document results are immutable once completed. You can safely cache completed document responses on your end to reduce API calls.

Process Document (Sync) - Upload and process documents
Process Document (Async) - Async processing
List Documents - List all documents
Delete Document - Delete a document