Query structured spec data via REST or MCP. Get exactly what your agent needs.
https://api.unstructured.io
http://localhost:8000
No endpoints found for this provider.
Elements
{
"type": "array",
"items": {
"Element": {
"type": "object",
"properties": {
"text": {},
"type": {},
"metadata": {},
"element_id": {}
}
}
}
}
HTTPValidationError
{
"type": "object",
"title": "HTTPValidationError",
"properties": {
"detail": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ValidationError"
},
"title": "Detail"
}
}
}
ValidationError
{
"type": "object",
"title": "ValidationError",
"required": [
"loc",
"msg",
"type"
],
"properties": {
"loc": {
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
}
]
},
"title": "Location"
},
"msg": {
"type": "string",
"title": "Message"
},
"type": {
"type": "string",
"title": "Error Type"
}
}
}
partition_parameters
{
"type": "object",
"title": "Partition Parameters",
"properties": {
"files": {
"type": "string",
"format": "binary",
"example": {
"summary": "File to be partitioned",
"externalValue": "https://github.com/Unstructured-IO/unstructured/blob/98d3541909f64290b5efb65a226fc3ee8a7cc5ee/example-docs/layout-parser-paper.pdf"
},
"required": "true",
"description": "The file to extract"
},
"overlap": {
"type": "integer",
"title": "Intra-chunk overlap",
"example": 25,
"description": "A prefix of this many trailing characters from the prior text-split chunk is applied to second and later chunks formed from oversized elements by text-splitting. Default: None"
},
"encoding": {
"type": "string",
"title": "Encoding",
"example": "utf-8",
"description": "The encoding method used to decode the text input. Default: utf-8"
},
"strategy": {
"type": "string",
"title": "Strategy",
"example": "hi_res",
"description": "The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto"
},
"languages": {
"type": "array",
"items": {
"type": "string",
"example": "eng"
},
"title": "OCR Languages",
"default": [],
"example": "[eng]",
"description": "The languages present in the document, for use in partitioning and/or OCR"
},
"coordinates": {
"type": "boolean",
"title": "Coordinates",
"description": "If true, return coordinates for each element. Default: false"
},
"overlap_all": {
"type": "boolean",
"title": "Inter-chunk overlap",
"description": "When True, overlap is also applied to 'normal' chunks formed by combining whole elements. Use with caution as this can introduce noise into otherwise clean semantic units. Default: None"
},
"output_format": {
"type": "string",
"title": "Output Format",
"example": "application/json",
"description": "The format of the response. Supported formats are application/json and text/csv. Default: application/json."
},
"xml_keep_tags": {
"type": "boolean",
"title": "Xml Keep Tags",
"description": "If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml."
},
"max_characters": {
"type": "integer",
"title": "Max Characters",
"example": 1500,
"description": "If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 500"
},
"chunking_strategy": {
"type": "string",
"title": "Chunking Strategy",
"example": "by_title",
"description": "Use one of the supported strategies to chunk the returned elements. Currently supports: by_title"
},
"hi_res_model_name": {
"type": "string",
"title": "Hi Res Model Name",
"example": "yolox",
"description": "The name of the inference model used when strategy is hi_res"
},
"new_after_n_chars": {
"type": "integer",
"title": "New after n chars",
"example": 1500,
"description": "If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: max_characters (off)"
},
"multipage_sections": {
"type": "boolean",
"title": "Multipage Sections",
"description": "If chunking strategy is set, determines if sections can span multiple pages. Only applies to by_title chunking strategy.Default: true"
},
"unique_element_ids": {
"type": "boolean",
"title": "Unique element IDs",
"description": "When True, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: False"
},
"include_page_breaks": {
"type": "boolean",
"title": "Include Page Breaks",
"description": "If True, the output will include page breaks if the filetype supports it. Default: false"
},
"combine_under_n_chars": {
"type": "integer",
"title": "Combine Under N Chars",
"example": 500,
"description": "If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: max_characters"
},
"include_orig_elements": {
"type": "boolean",
"title": "Original-elements flag",
"description": "When True (the default), the elements used to form a chunk appear in `.metadata.orig_elements` for that chunk. Only applies when chunking is specified using the `chunking_strategy` argument."
},
"skip_infer_table_types": {
"type": "array",
"items": {
"type": "string",
"example": "pdf"
},
"title": "Skip Infer Table Types",
"description": "The document types that you want to skip table extraction with. Default: []"
},
"extract_image_block_types": {
"type": "array",
"items": {
"type": "string",
"example": "image"
},
"title": "Image block types to extract",
"default": [],
"example": [
"image",
"table"
],
"description": "The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields"
},
"pdf_infer_table_structure": {
"type": "boolean",
"title": "Pdf Infer Table Structure",
"description": "Deprecated! Use skip_infer_table_types to opt out of table extraction for any file type. If False and strategy=hi_res, no Table Elements will be extracted from pdf files regardless of skip_infer_table_types contents."
},
"gz_uncompressed_content_type": {
"type": "string",
"title": "Uncompressed Content Type",
"example": "application/pdf",
"description": "If file is gzipped, use this content type after unzipping"
}
}
}