Skip to content

Commit

Permalink
process_batch() defaults table_structure_extractor. (#515)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexaryn committed Jul 7, 2024
1 parent 634c58a commit 63af7f1
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion lib/sycamore/sycamore/transforms/detr_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from sycamore.utils.image_utils import crop_to_bbox, image_to_bytes
from sycamore.utils.memory_debugging import display_top, gc_tensor_dump
from sycamore.utils.pdf import convert_from_path_streamed_batched
from sycamore.utils.time_trace import LogTime
from sycamore.utils.time_trace import LogTime, timetrace


def _batchify(iterable, n=1):
Expand Down Expand Up @@ -450,6 +450,8 @@ def process_batch(

if extract_table_structure:
with LogTime("extract_table_structure_batch"):
if table_structure_extractor is None:
table_structure_extractor = DEFAULT_TABLE_STRUCTURE_EXTRACTOR(device=self.device)
for i, page_elements in enumerate(deformable_layout):
image = batch[i]
for element in page_elements:
Expand Down Expand Up @@ -613,6 +615,7 @@ def extract(self, filename: Union[str, IOBase], hash_key: str, use_cache=False)
return pages


@timetrace("OCR")
def extract_ocr(
images: list[Image.Image], elements: list[list[Element]], ocr_images=False, ocr_tables=False
) -> list[list[Element]]:
Expand Down

0 comments on commit 63af7f1

Please sign in to comment.