This discussion has been locked.
You can no longer post new replies to this discussion. If you have a question you can start a new discussion

Running OCR scan on page through workflow

I am trying to run an OCR scan on a pdf that has been imported into M-Files via a mail import and then assigned to a workflow.
I am using the following code which is supposed to take the ocrzone and use it to go through the page however when running I get a expected end of state error.

Any assistance is appreciated


' Initialize the object version.
Dim oObjectVersion As MFilesAPI.ObjectVersion

' Check out the object first. We assume that initially the object is checked in.
Dim oObjectVersionCheckedOut As MFilesAPI.ObjectVersion
oObjectVersionCheckedOut = oVault.ObjectOperations.CheckOut(oObjectVersion.ObjVer.ObjID)

' Simply process all the files of the object.
Dim oObjectFiles As MFilesAPI.ObjectFiles = oVault.ObjectFileOperations.GetFiles(oObjectVersionCheckedOut.ObjVer)
For Each oObjectFile As MFilesAPI.ObjectFile In oObjectFiles

    ' Specify OCR options.
    Dim oOcrOptions As New MFilesAPI.OCROptions
    oOcrOptions.PrimaryLanguage = MFilesAPI.MFOCRLanguage.MFOCRLanguageFinnish
    oOcrOptions.SecondaryLanguage = MFilesAPI.MFOCRLanguage.MFOCRLanguageEnglishUS

    ' Specify an OCR zone to be recognized.
    Dim oOcrZone As New MFilesAPI.OCRZone
    oOcrZone.DataType = MFilesAPI.MFDataType.MFDatatypeText
    oOcrZone.DimensionUnit = MFilesAPI.MFOCRDimensionUnit.MFOCRDimensionUnitMillimeterX10
    oOcrZone.ID = 1
    oOcrZone.Left = 1580 ' This is interpreted as 158.0 mm.
    oOcrZone.Top = 470 ' This is interpreted as 47.0 mm.
    oOcrZone.Width = 900 ' This is interpreted as 90.0 mm.
    oOcrZone.Height = 130 ' This is interpreted as 13.0 mm.

    ' Construct an OCR page object and add the OCR zone to this OCR page.
    Dim oOcrPage As New MFilesAPI.OCRPage
    oOcrPage.OCRZones.Add(0, oOcrZone)

    ' Indicate that all zones contained by this OCR page are
    ' recognized on the page 1 of the source image.
    oOcrPage.PageNum = 1

    ' Construct an OCR page collection and add the OCR page to this collection.
    Dim oOcrPages As New MFilesAPI.OCRPages
    oOcrPages.Add(0, oOcrPage)

    ' Invoke the OCR operation for the target file by requesting
    ' 1) OCR zone recognition with specific OCR zones, and
    ' 2) conversion to a searchable PDF.
    Dim oOcrPageResults As MFilesAPI.OCRPageResults = _
    oVault.ObjectFileOperations.PerformOCROperation( _
        oObjectVersionCheckedOut.ObjVer, _
        oObjectFile.FileVer, _
        oOcrOptions, _
        MFilesAPI.MFOCRZoneRecognitionMode.MFOCRZoneRecognitionModeRecognizeSpecifiedZones, _
        oOcrPages, _
        True _
    )

    ' Process the OCR zone recognition results.
    For Each oOcrPageResult As MFilesAPI.OCRPageResult In oOcrPageResults
    For Each oOcrZoneResult As MFilesAPI.OCRZoneResult In oOcrPageResult.OCRZoneResults
        'set properties for ocr property to the ocr result
PropVal.PropertyDef = 1467 ' ocr test property definition ID
PropVal.TypedValue.SetValue MFDataTypeText, OCRPageResults
PropVals.Add -1, PropVal

Vault.ObjectPropertyOperations.SetProperties ObjVer, PropVals
    Next
    Next

Next

' Check in the object to finalize.
oVault.ObjectOperations.CheckIn(oObjectVersionCheckedOut.ObjVer)
Parents Reply Children
No Data