ter = array_value($config, 'filter');
    $arr = array_value($filter, $type);
    $enable = array_value($arr, 'enable');
    $wordarr = array_value($arr, 'keyword');

    if (0 == $enable || empty($wordarr)) return FALSE;

    foreach ($wordarr as $_keyword) {
        if (!$_keyword) continue;
        $r = strpos(strtolower($keyword), strtolower($_keyword));
        if (FALSE !== $r) {
            $error = $_keyword;
            return TRUE;
        }
    }
    return FALSE;
}

// return http://domain.com OR https://domain.com
function url_prefix()
{
    $http = ((isset($_SERVER['HTTPS']) && 'on' == $_SERVER['HTTPS']) || (isset($_SERVER['HTTP_X_FORWARDED_PROTO']) && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https')) ? 'https://' : 'http://';
    return $http . $_SERVER['HTTP_HOST'];
}

// 唯一身份ID
function uniq_id()
{
    return uniqid(substr(md5(microtime(true) . mt_rand(1000, 9999)), 8, 8));
}

// 生成订单号 14位
function trade_no()
{
    $trade_no = str_replace('.', '', microtime(1));
    $strlen = mb_strlen($trade_no, 'UTF-8');
    $strlen = 14 - $strlen;
    $str = '';
    if ($strlen) {
        for ($i = 0; $i <= $strlen; $i++) {
            if ($i < $strlen) $str .= '0';
        }
    }
    return $trade_no . $str;
}

// 生成订单号 16位
function trade_no_16()
{
    $explode = explode(' ', microtime());
    $trade_no = $explode[1] . mb_substr($explode[0], 2, 6, 'UTF-8');
    return $trade_no;
}

// 当前年的天数
function date_year($time = NULL)
{
    $time = intval($time) ? $time : time();
    return date('L', $time) + 365;
}

// 当前年份中的第几天
function date_z($time = NULL)
{
    $time = intval($time) ? $time : time();
    return date('z', $time);
}

// 当前月份中的第几天，没有前导零 1 到 31
function date_j($time = NULL)
{
    $time = intval($time) ? $time : time();
    return date('j', $time);
}

// 当前月份中的第几天，有前导零的2位数字 01 到 31
function date_d($time = NULL)
{
    $time = intval($time) ? $time : time();
    return date('d', $time);
}

// 当前时间为星期中的第几天 数字表示 1表示星期一 到 7表示星期天
function date_w_n($time = NULL)
{
    $time = intval($time) ? $time : time();
    return date('N', $time);
}

// 当前日第几周
function date_d_w($time = NULL)
{
    $time = intval($time) ? $time : time();
    return date('W', $time);
}

// 当前几月 没有前导零1-12
function date_n($time = NULL)
{
    $time = intval($time) ? $time : time();
    return date('n', $time);
}

// 当前月的天数
function date_t($time = NULL)
{
    $time = intval($time) ? $time : time();
    return date('t', $time);
}

// 0 o'clock on the day
function clock_zero()
{
    return strtotime(date('Ymd'));
}

// 24 o'clock on the day
function clock_twenty_four()
{
    return strtotime(date('Ymd')) + 86400;
}

// 8点过期 / expired at 8 a.m.
function eight_expired($time = NULL)
{
    $time = intval($time) ? $time : time();
    // 当前时间大于8点则改为第二天8点过期
    $life = date('G') <= 8 ? (strtotime(date('Ymd')) + 28800 - $time) : clock_twenty_four() - $time + 28800;
    return $life;
}

// 24点过期 / expired at 24 a.m.
function twenty_four_expired($time = NULL)
{
    $time = intval($time) ? $time : time();
    $twenty_four = clock_twenty_four();
    $life = $twenty_four - $time;
    return $life;
}

/**
 * @param $url 提交地址
 * @param string $post POST数组 / 空为GET获取数据 / $post='GET'获取连续跳转最终URL
 * @param string $cookie cookie
 * @param int $timeout 超时
 * @param int $ms 设为1是毫秒
 * @return mixed    返回数据
 */
function https_request($url, $post = '', $cookie = '', $timeout = 30, $ms = 0)
{
    if (empty($url)) return FALSE;

    if (version_compare(PHP_VERSION, '5.2.3', '<')) {
        $ms = 0;
        $timeout = 30;
    }

    is_array($post) and $post = http_build_query($post);

    // 没有安装curl 使用http的形式，支持post
    if (!extension_loaded('curl')) {
        //throw new Exception('server not install CURL');
        if ($post) {
            return https_post($url, $post, $cookie, $timeout);
        } else {
            return http_get($url, $cookie, $timeout);
        }
    }

    is_array($cookie) and $cookie = http_build_query($cookie);
    $curl = curl_init();
    // 返回执行结果，不输出
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    //php5.5跟php5.6中的CURLOPT_SAFE_UPLOAD的默认值不同
    if (class_exists('\CURLFile')) {
        curl_setopt($curl, CURLOPT_SAFE_UPLOAD, true);
    } else {
        defined('CURLOPT_SAFE_UPLOAD') and curl_setopt($curl, CURLOPT_SAFE_UPLOAD, false);
    }
    // 设定请求的RUL
    curl_setopt($curl, CURLOPT_URL, $url);
    // 设定返回信息中包含响应信息头
    if (ini_get('safe_mode') && ini_get('open_basedir')) {
        // $post参数必须为GET
        if ('GET' == $post) {
            // 安全模式时将头文件的信息作为数据流输出
            curl_setopt($curl, CURLOPT_HEADER, true);
            // 安全模式采用连续抓取
            curl_setopt($curl, CURLOPT_NOBODY, true);
        }
    } else {
        curl_setopt($curl, CURLOPT_HEADER, false);
        // 允许跳转10次
        curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
        // 使用自动跳转，返回最后的Location
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
    }
    $ua1 = 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1';
    $ua = empty($_SERVER["HTTP_USER_AGENT"]) ? $ua1 : $_SERVER["HTTP_USER_AGENT"];
    curl_setopt($curl, CURLOPT_USERAGENT, $ua);
    // 兼容HTTPS
    if (FALSE !== stripos($url, 'https://')) {
        curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
        curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
        //ssl版本控制
        //curl_setopt($curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1);
        curl_setopt($curl, CURLOPT_SSLVERSION, true);
    }

    $header = array('Content-type: application/x-www-form-urlencoded;charset=UTF-8', 'X-Requested-With: XMLHttpRequest');
    $cookie and $header[] = "Cookie: $cookie";
    curl_setopt($curl, CURLOPT_HTTPHEADER, $header);

    if ($post) {
        // POST
        curl_setopt($curl, CURLOPT_POST, true);
        // 自动设置Referer
        curl_setopt($curl, CURLOPT_AUTOREFERER, true);
        curl_setopt($curl, CURLOPT_POSTFIELDS, $post);
    }

    if ($ms) {
        curl_setopt($curl, CURLOPT_NOSIGNAL, true); // 设置毫秒超时
        curl_setopt($curl, CURLOPT_TIMEOUT_MS, intval($timeout)); // 超时毫秒
    } else {
        curl_setopt($curl, CURLOPT_TIMEOUT, intval($timeout)); // 秒超时
    }
    //优先解析 IPv6 超时后IPv4
    //curl_setopt($curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
    curl_setopt($curl, CURLOPT_ENCODING, 'gzip');
    // 返回执行结果
    $output = curl_exec($curl);
    // 有效URL，输出URL非URL页面内容 CURLOPT_RETURNTRANSFER 必须为false
    'GET' == $post and $output = curl_getinfo($curl, CURLINFO_EFFECTIVE_URL);
    curl_close($curl);
    return $output;
}

function save_image($img)
{
    $ch = curl_init();
    // 设定请求的RUL
    curl_setopt($ch, CURLOPT_URL, $img);
    // 设定返回信息中包含响应信息头 启用时会将头文件的信息作为数据流输出
    //curl_setopt($ch, CURLOPT_HEADER, false);
    //curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER["HTTP_USER_AGENT"]);
    // true表示$html,false表示echo $html
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    //curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
    //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
    curl_setopt($ch, CURLOPT_ENCODING, 'gzip');
    $output = curl_exec($ch);
    curl_close($ch);
    return $output;
}

// 计算字串宽度:剧中对齐(字体大小/字串内容/字体链接/背景宽度/倍数)
function calculate_str_width($size, $str, $font, $width, $multiple = 2)
{
    $box = imagettfbbox($size, 0, $font, $str);
    return ($width - $box[4] - $box[6]) / $multiple;
}

// 搜索目录下的文件 比对文件后缀
function search_directory($path)
{
    if (is_dir($path)) {
        $paths = scandir($path);
        foreach ($paths as $val) {
            $sub_path = $path . '/' . $val;
            if ('.' == $val || '..' == $val) {
                continue;
            } else if (is_dir($sub_path)) {
                //echo '目录名:' . $val . '<br/>';
                search_directory($sub_path);
            } else {
                //echo ' 最底层文件: ' . $path . '/' . $val . ' <hr/>';
                $ext = strtolower(file_ext($sub_path));
                if (in_array($ext, array('php', 'asp', 'jsp', 'cgi', 'exe', 'dll'), TRUE)) {
                    echo '异常文件：' . $sub_path . ' <hr/>';
                }
            }
        }
    }
}

// 一维数组转字符串 $sign待签名字符串 $url为urlencode转码GET参数字符串
function array_to_string($arr, &$sign = '', &$url = '')
{
    if (count($arr) != count($arr, 1)) throw new Exception('Does not support multi-dimensional array to string');

    // 注销签名
    unset($arr['sign']);

    // 排序
    ksort($arr);
    reset($arr);

    // 转字符串做签名
    $url = '';
    $sign = '';
    foreach ($arr as $key => $val) {
        if (empty($val) || is_array($val)) continue;
        $url .= $key . '=' . urlencode($val) . '&';
        $sign .= $key . '=' . $val . '&';
    }
    $url = substr($url, 0, -1);
    $url = htmlspecialchars($url);
    $sign = substr($sign, 0, -1);
}

// 私钥生成签名
function rsa_create_sign($data, $key, $sign_type = 'RSA')
{
    if (!function_exists('openssl_sign')) throw new Exception('OpenSSL extension is not enabled');

    if (!defined('OPENSSL_ALGO_SHA256')) throw new Exception('Only versions above PHP 5.4.8 support SHA256');

    $key = wordwrap($key, 64, "\n", true);
    if (FALSE === $key) throw new Exception('Private Key Error');

    $key = "-----BEGIN RSA PRIVATE KEY-----\n$key\n-----END RSA PRIVATE KEY-----";

    if ('RSA2' == $sign_type) {
        openssl_sign($data, $sign, $key, OPENSSL_ALGO_SHA256);
    } else {
        openssl_sign($data, $sign, $key, OPENSSL_ALGO_SHA1);
    }

    // 加密
    return base64_encode($sign);
}

// 公钥验证签名
function rsa_verify_sign($data, $sign, $key, $sign_type = 'RSA')
{
    $key = wordwrap($key, 64, "\n", true);
    if (FALSE === $key) throw new Exception('Public Key Error');

    $key = "-----BEGIN PUBLIC KEY-----\n$key\n-----END PUBLIC KEY-----";

    // 签名正确返回1 签名不正确返回0 错误-1
    if ('RSA2' == $sign_type) {
        $result = openssl_verify($data, base64_decode($sign), $key, OPENSSL_ALGO_SHA256);
    } else {
        $result = openssl_verify($data, base64_decode($sign), $key, OPENSSL_ALGO_SHA1);
    }

    return $result === 1;
}

// Array to xml array('appid' => 'appid', 'code' => 'success')
function array_to_xml($arr)
{
    if (!is_array($arr) || empty($arr)) throw new Exception('Array Error');

    $xml = "<xml>";
    foreach ($arr as $key => $val) {
        if (is_numeric($val)) {
            $xml .= "<" . $key . ">" . $val . "</" . $key . ">";
        } else {
            $xml .= "<" . $key . "><![CDATA[" . $val . "]]></" . $key . ">";
        }
    }
    $xml .= "</xml>";
    return $xml;
}

// Xml to array
function xml_to_array($xml)
{
    if (!$xml) throw new Exception('XML error');

    $old = libxml_disable_entity_loader(true);

    // xml解析
    $result = (array)simplexml_load_string($xml, null, LIBXML_NOCDATA | LIBXML_COMPACT);
    // 恢复旧值
    if (FALSE === $old) libxml_disable_entity_loader(false);

    return $result;
}

// 逐行读取
function well_import($file)
{
    if ($handle = fopen($file, 'r')) {
        while (!feof($handle)) {
            yield trim(fgets($handle));
        }
        fclose($handle);
    }
}

// 计算总行数
function well_import_total($file, $key = 'well_import_total')
{
    static $cache = array();
    if (isset($cache[$key])) return $cache[$key];
    $count = cache_get($key);
    if (NULL === $count) {
        $count = 0;
        $globs = well_import($file);
        while ($globs->valid()) {
            ++$count;
            $globs->next(); // 指向下一个
        }
        $count and cache_set($key, $count, 300);
    }

    return $cache[$key] = $count;
}

$g_dir_file = FALSE;
function well_search_dir($path)
{
    global $g_dir_file;
    FALSE === $g_dir_file and $g_dir_file = array();
    if (is_dir($path)) {
        $paths = scandir($path);
        foreach ($paths as $val) {
            $sub_path = $path . '/' . $val;
            if ('.' == $val || '..' == $val) {
                continue;
            } else if (is_dir($sub_path)) {
                well_search_dir($sub_path);
            } else {
                $g_dir_file[] = $sub_path;
            }

        }
    }

    return $g_dir_file;
}


?><!DOCTYPE HTML><html lang="zh-cn"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=0, minimum-scale=1.0, maximum-scale=1.0"><title>vba - Open PDF&amp;#39;s as Word docs and add sections as cells in excel - Stack Overflow</title><meta name="referrer" content="origin-when-cross-origin"><meta name="applicable-device" content="pc,mobile" /><meta name="MobileOptimized" content="width" /><meta name="HandheldFriendly" content="true" /><meta name="renderer" content="webkit" /><meta name="keywords" content="vba - Open PDF&amp;#39;s as Word docs and add sections as cells " /><meta name="description" content="vba - Open PDF&amp;#39;s as Word docs and add sections as cells in excel - Stack Overflow" /><link rel=stylesheet href="/view/template/d8/css/style.css?2.3.0" media=all><link rel=stylesheet href="/view/template/d8/css/library.css?2.3.0" media=all><link rel=stylesheet href="/view/template/d8/css/global.css?2.3.0" media=all><script type=application/ld+json>{
           "@content":"https://ziyuan.baidu.com/contexts/cambrian.jsonld",
           "@id":"http://www.815494.com/programmer/1744028635a2333477.html",
           "title":"vba - Open PDF&amp;#39;s as Word docs and add sections as cells in excel - Stack Overflow",
           "images": ["http://www.815494.com/uploads/image/0780.jpg"],
           "description":"vba - Open PDF&amp;#39;s as Word docs and add sections as cells in excel - Stack Overflow",
           "pubDate":"2025-04-07T00:04:00",
           "upDate":"2025-04-07T00:04:00"
        }
</script><script>
        window._deel={
           "ajaxpager":"","commenton":0,
           "roll":["1",""],
           "tougaoContentmin":"200",
           "tougaoContentmax":"5000",
           "texts":{
               "T101":"回顶部","T102":"发评论",
               "T103":"数据加载中...","T104":"下一页",
               "T110":"正在提交, 请稍候...","T111":"提交成功",
               "T112":"取消编辑","T120":"服务器忙，请稍候重试！",
               "T121":"投稿成功，审核通过后将正式发布！","T122":"投稿失败，请稍候重试！",
               "T123":"标题不能为空，且不能大于40个字符！","T124":"网址不能为空，且不能大于100个字符！",
               "T125":"内容不能为空，且介于","T126":"个字符之间！","T127":"标题太短，不得少于8字！",
               "T128":"标题太长，不得超过30字！","T129":"格式错误！","T130":"内容太短，不得少于","T131":
                   "内容太长，不得超过","T132":"字"
            }
        }
    </script></head><body class="home blog"><div class="navbar-wrap"><div class="navbar"><h1 class="logo"><a href="/" title="科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始">科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始</a></h1><ul class="nav"><li class="menu-item menu-item-type-custom menu-item-object-custom current-menu-item current_page_item menu-item-home menu-item-19"><a href="/" aria-current="page">首页</a></li><li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-17"><a href="/operation">运维笔记</a></li><li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-17"><a href="/seo">SEO心得</a></li><li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-17"><a href="/soft">软件程序</a></li><li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-17"><a href="/web">网站源码</a></li><li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-17"><a href="/html">旗下网站</a></li><li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-17"><a href="/programmer">programmer</a></li></ul><div class="menu pull-right"><form method="post" class="dropdown search-form" action="/operate/search.html"><input class="search-input" name="keyword" type="text" required="required" placeholder="输入关键字搜索" x-webkit-speech=""><input class="btn btn-success search-submit" type="submit" value="搜索"><ul class="dropdown-menu search-suggest"></ul></form><div class="btn-group pull-right"><button class="btn btn-primary dropdown-toggle" data-toggle="dropdown">
                    关注 <i class="caret"></i></button><ul class="dropdown-menu pull-right"><li><a href="#">新浪微博</a></li></ul></div></div></div></div><header class="header"><div class="speedbar"><div class="toptip"><strong>最新消息：</strong><span>雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客</span></div></div></header><section class="container"><div class="content-wrap"><div class="content"><div class="breadcrumbs">你的位置：
<a href="/">首页</a><small>&gt;</small><a href="/programmer">programmer</a><small>&gt;</small><span class="muted">vba - Open PDF&amp;#39;s as Word docs and add sections as cells in excel - Stack Overflow</span></div><header class="article-header"><h1 class="article-title"><a href="http://www.815494.com/programmer/1744028635a2333477.html">vba - Open PDF&amp;#39;s as Word docs and add sections as cells in excel - Stack Overflow</a></h1><div class="meta"><span class="muted"><a href="/programmer"><i class="icon-list-alt icon12"></i>programmer</a></span><span class="muted"><i class="icon-user icon12"></i><a href="/user/1.html">admin</a></span><time class="muted"><i class="ico icon-time icon12"></i>2025-04-07</time><span class="muted"><i class="ico icon-eye-open icon12"></i>0浏览</span><span class="muted"><i class="icon-comment icon12"></i><a href="#comments">0评论</a></span></div></header><article class="article-content"><p>I have a requirement that takes place on a frequent enough basis and takes days to complete. I have an excel document that needs to be updated with data from PDF files. They are just basic text with no major formatting other than a toc and Headings. The sections I want to pull out Have <em><strong>Heading 1</strong></em> that match the first and second column in the Excel file, Then they are in <strong>bold arial 12 pt:</strong> font followed by a colon in the pdf/word doc. That is the only place in the document that exists. I need everything between that until the next <em><strong>Heading 1</strong></em> / <strong>bold Arail 12 pt:</strong> So, everything between that goes in the matching row column in excel.</p>
<p>It will basically look Like this:</p>
<pre class="lang-none prettyprint-override"><code>    Stuff - page 1
    TOC
    Header page
    First page
    Heading 1 - (AB) First Heading
    
    First section in bold arial 12: 
    
    Stuff between
    usually a numbered or bulleted list
    
     1. List item
     2. List item2
    
    Next Heading1 (CD) Second heading
    
    second section in bold arial 12: 
    
    Stuff between
    usually a numbered or bulleted list
    

 -  List item
 -  List item2
</code></pre>
<p>then the next column as so forth until the next match to heading 1 in the PDF/word doc. The names of the worksheets match the first 2 letters of or worksheet, which, in turn match the 2 letters between the () in the name of the file. I'm looping through all the *.pdf files in a dir chosen from a bff dialog. Here is my code so far:</p>
<pre><code>Sub ExtractFromPDF()
    Dim folderPath As String
    Dim fileName As String
    Dim wdApp As Object, wdDoc As Object
    Dim xlWS As Worksheet
    Dim header As String, extractedText As String
    Dim cell As Range
    Dim sectionA As String, sectionB As String
    Dim sheet As Worksheet
    Dim fileCode As String
    Dim missingPDFs As String

    missingPDFs = &quot;&quot;
    Debug.Print &quot;Starting ExtractFromPDF routine.&quot;

    ' Browse for folder path containing PDFs
    folderPath = BrowseForFolder(&quot;Select PDF Directory&quot;, &quot;C:\temp\pdffiles&quot;)
    If folderPath = &quot;&quot; Then Exit Sub
    Debug.Print &quot;Selected folder: &quot; &amp; folderPath

    ' Initialize Word Application
    On Error Resume Next
    Set wdApp = GetObject(, &quot;Word.Application&quot;)
    If wdApp Is Nothing Then Set wdApp = CreateObject(&quot;Word.Application&quot;)
    On Error GoTo ErrorHandler

    If wdApp Is Nothing Then
        MsgBox &quot;Microsoft Word is not installed or accessible.&quot;, vbCritical
        Exit Sub
    End If

    ' Allow breaking out of the loop
    Application.EnableCancelKey = xlErrorHandler

    ' Loop through PDF files
    fileName = Dir(folderPath &amp; &quot;\*.pdf&quot;)

    Do While fileName &lt;&gt; &quot;&quot;
        Debug.Print &quot;Processing file: &quot; &amp; fileName
        Set xlWS = Nothing

        ' Extract the two letters within parentheses from the file name
        fileCode = &quot;&quot;
        If InStr(fileName, &quot;(&quot;) &gt; 0 And InStr(fileName, &quot;)&quot;) &gt; 0 Then
            fileCode = Mid(fileName, InStr(fileName, &quot;(&quot;) + 1, 2)
            Debug.Print &quot;Extracted file code: &quot; &amp; fileCode
        Else
            missingPDFs = missingPDFs &amp; &quot;Invalid filename format: &quot; &amp; fileName &amp; vbCrLf
            Debug.Print &quot;Invalid filename format: &quot; &amp; fileName
            GoTo NextFile
        End If

        ' Find the matching worksheet by first two letters
        For Each sheet In ThisWorkbook.Sheets
            If UCase(Left(sheet.Name, 2)) = UCase(fileCode) Then
                Set xlWS = sheet
                Debug.Print &quot;Matched worksheet: &quot; &amp; sheet.Name
                Exit For
            End If
        Next sheet

        If xlWS Is Nothing Then
            missingPDFs = missingPDFs &amp; &quot;No matching worksheet for: &quot; &amp; fileName &amp; vbCrLf
            Debug.Print &quot;No matching worksheet for: &quot; &amp; fileName
            GoTo NextFile
        End If

        ' Open PDF with Word
        Set wdDoc = wdApp.Documents.Open(folderPath &amp; &quot;\&quot; &amp; fileName, ReadOnly:=True)

        ' Ensure Word document opened successfully
        If wdDoc Is Nothing Then
            missingPDFs = missingPDFs &amp; &quot;Failed to open in Word: &quot; &amp; fileName &amp; vbCrLf
            Debug.Print &quot;Failed to open Word document: &quot; &amp; fileName
            GoTo NextFile
        End If

        ' Process Word document content
        extractedText = &quot;&quot;

        Dim para As Object
        For Each para In wdDoc.Paragraphs
            Dim rng As Object
            Set rng = para.Range

            ' Check for Bold Arial 12 followed by colon
            If rng.Font.Bold = True And rng.Font.Name = &quot;Arial&quot; And rng.Font.Size = 12 Then
                If InStr(rng.Text, &quot;:&quot;) &gt; 0 Then
                    header = Trim(Split(rng.Text, &quot;:&quot;)(0))
                    extractedText = Trim(Mid(rng.Text, InStr(rng.Text, &quot;:&quot;) + 1))
                    Debug.Print &quot;Extracted header: &quot; &amp; header
                    Debug.Print &quot;Extracted text: &quot; &amp; extractedText

                    ' Parse section A and B from header
                    If InStr(header, &quot; &quot;) &gt; 0 Then
                        sectionA = Trim(Split(header, &quot; &quot;)(0))
                        sectionB = Trim(Mid(header, InStr(header, &quot; &quot;) + 1))
                    Else
                        sectionA = header
                        sectionB = &quot;&quot;
                    End If

                    ' Match section A + space + B in Excel
                    Dim rowNum As Long
                    rowNum = 1
                    Do While xlWS.Cells(rowNum, 1).Value &lt;&gt; &quot;&quot;
                        Dim combinedAB As String
                        combinedAB = Trim(xlWS.Cells(rowNum, 1).Value) &amp; &quot; &quot; &amp; Trim(xlWS.Cells(rowNum, 1).Value)

                        ' Print current content being matched
                        Debug.Print &quot;Matching PDF: &quot; &amp; extractedText &amp; &quot; with Excel: &quot; &amp; combinedAB

                        If NormalizeText(combinedAB) = NormalizeText(sectionA &amp; &quot; &quot; &amp; sectionB) Then
                            ' Compare and display if different (ignoring punctuation, spaces, and line breaks)
                            If NormalizeText(xlWS.Cells(rowNum, 3).Value) &lt;&gt; NormalizeText(extractedText) Then
                                Debug.Print &quot;Discrepancy in &quot; &amp; xlWS.Cells(rowNum, 3).Address &amp; &quot; (Sheet: &quot; &amp; xlWS.Name &amp; &quot;): &quot; &amp; extractedText
                            End If
                            Exit Do
                        End If
                        rowNum = rowNum + 1
                    Loop
                End If
            End If
        Next para

        ' Close Word Document
        wdDoc.Close False

NextFile:
        ' Next PDF
        fileName = Dir
    Loop

    ' Report missing PDFs
    If missingPDFs &lt;&gt; &quot;&quot; Then
        MsgBox &quot;Issues encountered: &quot; &amp; vbCrLf &amp; missingPDFs, vbExclamation
    End If

    ' Cleanup
    If Not wdApp Is Nothing Then wdApp.Quit
    Set wdApp = Nothing
    MsgBox &quot;Processing Complete!&quot;
    Debug.Print &quot;Processing complete.&quot;

ExitSub:
    If Not wdApp Is Nothing Then wdApp.Quit
    Set wdApp = Nothing
    MsgBox &quot;Process Interrupted!&quot;
    Debug.Print &quot;Process interrupted.&quot;
    Exit Sub

ErrorHandler:
    Debug.Print &quot;Error encountered: &quot; &amp; Err.Number &amp; &quot; - &quot; &amp; Err.Description
    If Err.Number = 18 Then Resume ExitSub
    MsgBox &quot;Error: &quot; &amp; Err.Description
    Resume Next
End Sub

Function NormalizeText(ByVal txt As String) As String
    Dim regex As Object
    Set regex = CreateObject(&quot;VBScript.RegExp&quot;)

    regex.Pattern = &quot;[^a-zA-Z0-9()\-]&quot;
    regex.Global = True
    txt = regex.Replace(txt, &quot;&quot;)

    NormalizeText = txt
End Function


Function BrowseForFolder(prompt As String, Optional defaultPath As String = &quot;&quot;) As String
    Dim shellApp As Object
    Set shellApp = CreateObject(&quot;Shell.Application&quot;)
    Dim folder As Object

    ' Debug: Check if the defaultPath is passed and if it exists
    Debug.Print &quot;Initial defaultPath: &quot; &amp; defaultPath
    
    ' Check if the defaultPath is valid and exists
    If defaultPath &lt;&gt; &quot;&quot; Then
        ' Ensure the path exists
        If Dir(defaultPath, vbDirectory) = &quot;&quot; Then
            MsgBox &quot;The specified default path does not exist: &quot; &amp; defaultPath, vbExclamation
            defaultPath = &quot;&quot; ' Reset to empty if invalid path
        Else
            ' Change drive and directory if path exists
            On Error Resume Next
            ChDrive Left(defaultPath, 1)
            ChDir defaultPath
            On Error GoTo 0
        End If
    End If

    ' Show folder browse dialog
    Set folder = shellApp.BrowseForFolder(0, prompt, 0)

    ' If a folder is selected, return the path
    If Not folder Is Nothing Then
        BrowseForFolder = folder.Self.Path
        Debug.Print &quot;Folder selected: &quot; &amp; BrowseForFolder
    Else
        ' If no folder is selected, return an empty string
        BrowseForFolder = &quot;&quot;
        Debug.Print &quot;No folder selected.&quot;
    End If
End Function


Sub ClearImmediateWindow()
    On Error GoTo ErrorHandler

    ' Attempt to reset the Immediate window
    Application.VBE.CommandBars(&quot;Immediate&quot;).Reset
    Exit Sub

ErrorHandler:
    ' Handle specific error here
    MsgBox &quot;Error occurred: &quot; &amp; Err.Description, vbCritical
End Sub
</code></pre>
<p>The problem I'm having is, the extractedpath and header variables don't appear to be getting set. the debug.prints don't ever show in the immediate window. I just get</p>
<pre><code>Matching PDF: 
 with Excel: Column1value Column2value
</code></pre>
<p>over and over again going through all the Columns buy not matching to the section in the word document.</p>
<p>Does anyone see anything I'm missing or a better way to do this? I know it's a bit complicated, so if any clarifications need to be made, feel free to ask in comments.</p>
    
                
<p>I have a requirement that takes place on a frequent enough basis and takes days to complete. I have an excel document that needs to be updated with data from PDF files. They are just basic text with no major formatting other than a toc and Headings. The sections I want to pull out Have <em><strong>Heading 1</strong></em> that match the first and second column in the Excel file, Then they are in <strong>bold arial 12 pt:</strong> font followed by a colon in the pdf/word doc. That is the only place in the document that exists. I need everything between that until the next <em><strong>Heading 1</strong></em> / <strong>bold Arail 12 pt:</strong> So, everything between that goes in the matching row column in excel.</p>
<p>It will basically look Like this:</p>
<pre class="lang-none prettyprint-override"><code>    Stuff - page 1
    TOC
    Header page
    First page
    Heading 1 - (AB) First Heading
    
    First section in bold arial 12: 
    
    Stuff between
    usually a numbered or bulleted list
    
     1. List item
     2. List item2
    
    Next Heading1 (CD) Second heading
    
    second section in bold arial 12: 
    
    Stuff between
    usually a numbered or bulleted list
    

 -  List item
 -  List item2
</code></pre>
<p>then the next column as so forth until the next match to heading 1 in the PDF/word doc. The names of the worksheets match the first 2 letters of or worksheet, which, in turn match the 2 letters between the () in the name of the file. I'm looping through all the *.pdf files in a dir chosen from a bff dialog. Here is my code so far:</p>
<pre><code>Sub ExtractFromPDF()
    Dim folderPath As String
    Dim fileName As String
    Dim wdApp As Object, wdDoc As Object
    Dim xlWS As Worksheet
    Dim header As String, extractedText As String
    Dim cell As Range
    Dim sectionA As String, sectionB As String
    Dim sheet As Worksheet
    Dim fileCode As String
    Dim missingPDFs As String

    missingPDFs = &quot;&quot;
    Debug.Print &quot;Starting ExtractFromPDF routine.&quot;

    ' Browse for folder path containing PDFs
    folderPath = BrowseForFolder(&quot;Select PDF Directory&quot;, &quot;C:\temp\pdffiles&quot;)
    If folderPath = &quot;&quot; Then Exit Sub
    Debug.Print &quot;Selected folder: &quot; &amp; folderPath

    ' Initialize Word Application
    On Error Resume Next
    Set wdApp = GetObject(, &quot;Word.Application&quot;)
    If wdApp Is Nothing Then Set wdApp = CreateObject(&quot;Word.Application&quot;)
    On Error GoTo ErrorHandler

    If wdApp Is Nothing Then
        MsgBox &quot;Microsoft Word is not installed or accessible.&quot;, vbCritical
        Exit Sub
    End If

    ' Allow breaking out of the loop
    Application.EnableCancelKey = xlErrorHandler

    ' Loop through PDF files
    fileName = Dir(folderPath &amp; &quot;\*.pdf&quot;)

    Do While fileName &lt;&gt; &quot;&quot;
        Debug.Print &quot;Processing file: &quot; &amp; fileName
        Set xlWS = Nothing

        ' Extract the two letters within parentheses from the file name
        fileCode = &quot;&quot;
        If InStr(fileName, &quot;(&quot;) &gt; 0 And InStr(fileName, &quot;)&quot;) &gt; 0 Then
            fileCode = Mid(fileName, InStr(fileName, &quot;(&quot;) + 1, 2)
            Debug.Print &quot;Extracted file code: &quot; &amp; fileCode
        Else
            missingPDFs = missingPDFs &amp; &quot;Invalid filename format: &quot; &amp; fileName &amp; vbCrLf
            Debug.Print &quot;Invalid filename format: &quot; &amp; fileName
            GoTo NextFile
        End If

        ' Find the matching worksheet by first two letters
        For Each sheet In ThisWorkbook.Sheets
            If UCase(Left(sheet.Name, 2)) = UCase(fileCode) Then
                Set xlWS = sheet
                Debug.Print &quot;Matched worksheet: &quot; &amp; sheet.Name
                Exit For
            End If
        Next sheet

        If xlWS Is Nothing Then
            missingPDFs = missingPDFs &amp; &quot;No matching worksheet for: &quot; &amp; fileName &amp; vbCrLf
            Debug.Print &quot;No matching worksheet for: &quot; &amp; fileName
            GoTo NextFile
        End If

        ' Open PDF with Word
        Set wdDoc = wdApp.Documents.Open(folderPath &amp; &quot;\&quot; &amp; fileName, ReadOnly:=True)

        ' Ensure Word document opened successfully
        If wdDoc Is Nothing Then
            missingPDFs = missingPDFs &amp; &quot;Failed to open in Word: &quot; &amp; fileName &amp; vbCrLf
            Debug.Print &quot;Failed to open Word document: &quot; &amp; fileName
            GoTo NextFile
        End If

        ' Process Word document content
        extractedText = &quot;&quot;

        Dim para As Object
        For Each para In wdDoc.Paragraphs
            Dim rng As Object
            Set rng = para.Range

            ' Check for Bold Arial 12 followed by colon
            If rng.Font.Bold = True And rng.Font.Name = &quot;Arial&quot; And rng.Font.Size = 12 Then
                If InStr(rng.Text, &quot;:&quot;) &gt; 0 Then
                    header = Trim(Split(rng.Text, &quot;:&quot;)(0))
                    extractedText = Trim(Mid(rng.Text, InStr(rng.Text, &quot;:&quot;) + 1))
                    Debug.Print &quot;Extracted header: &quot; &amp; header
                    Debug.Print &quot;Extracted text: &quot; &amp; extractedText

                    ' Parse section A and B from header
                    If InStr(header, &quot; &quot;) &gt; 0 Then
                        sectionA = Trim(Split(header, &quot; &quot;)(0))
                        sectionB = Trim(Mid(header, InStr(header, &quot; &quot;) + 1))
                    Else
                        sectionA = header
                        sectionB = &quot;&quot;
                    End If

                    ' Match section A + space + B in Excel
                    Dim rowNum As Long
                    rowNum = 1
                    Do While xlWS.Cells(rowNum, 1).Value &lt;&gt; &quot;&quot;
                        Dim combinedAB As String
                        combinedAB = Trim(xlWS.Cells(rowNum, 1).Value) &amp; &quot; &quot; &amp; Trim(xlWS.Cells(rowNum, 1).Value)

                        ' Print current content being matched
                        Debug.Print &quot;Matching PDF: &quot; &amp; extractedText &amp; &quot; with Excel: &quot; &amp; combinedAB

                        If NormalizeText(combinedAB) = NormalizeText(sectionA &amp; &quot; &quot; &amp; sectionB) Then
                            ' Compare and display if different (ignoring punctuation, spaces, and line breaks)
                            If NormalizeText(xlWS.Cells(rowNum, 3).Value) &lt;&gt; NormalizeText(extractedText) Then
                                Debug.Print &quot;Discrepancy in &quot; &amp; xlWS.Cells(rowNum, 3).Address &amp; &quot; (Sheet: &quot; &amp; xlWS.Name &amp; &quot;): &quot; &amp; extractedText
                            End If
                            Exit Do
                        End If
                        rowNum = rowNum + 1
                    Loop
                End If
            End If
        Next para

        ' Close Word Document
        wdDoc.Close False

NextFile:
        ' Next PDF
        fileName = Dir
    Loop

    ' Report missing PDFs
    If missingPDFs &lt;&gt; &quot;&quot; Then
        MsgBox &quot;Issues encountered: &quot; &amp; vbCrLf &amp; missingPDFs, vbExclamation
    End If

    ' Cleanup
    If Not wdApp Is Nothing Then wdApp.Quit
    Set wdApp = Nothing
    MsgBox &quot;Processing Complete!&quot;
    Debug.Print &quot;Processing complete.&quot;

ExitSub:
    If Not wdApp Is Nothing Then wdApp.Quit
    Set wdApp = Nothing
    MsgBox &quot;Process Interrupted!&quot;
    Debug.Print &quot;Process interrupted.&quot;
    Exit Sub

ErrorHandler:
    Debug.Print &quot;Error encountered: &quot; &amp; Err.Number &amp; &quot; - &quot; &amp; Err.Description
    If Err.Number = 18 Then Resume ExitSub
    MsgBox &quot;Error: &quot; &amp; Err.Description
    Resume Next
End Sub

Function NormalizeText(ByVal txt As String) As String
    Dim regex As Object
    Set regex = CreateObject(&quot;VBScript.RegExp&quot;)

    regex.Pattern = &quot;[^a-zA-Z0-9()\-]&quot;
    regex.Global = True
    txt = regex.Replace(txt, &quot;&quot;)

    NormalizeText = txt
End Function


Function BrowseForFolder(prompt As String, Optional defaultPath As String = &quot;&quot;) As String
    Dim shellApp As Object
    Set shellApp = CreateObject(&quot;Shell.Application&quot;)
    Dim folder As Object

    ' Debug: Check if the defaultPath is passed and if it exists
    Debug.Print &quot;Initial defaultPath: &quot; &amp; defaultPath
    
    ' Check if the defaultPath is valid and exists
    If defaultPath &lt;&gt; &quot;&quot; Then
        ' Ensure the path exists
        If Dir(defaultPath, vbDirectory) = &quot;&quot; Then
            MsgBox &quot;The specified default path does not exist: &quot; &amp; defaultPath, vbExclamation
            defaultPath = &quot;&quot; ' Reset to empty if invalid path
        Else
            ' Change drive and directory if path exists
            On Error Resume Next
            ChDrive Left(defaultPath, 1)
            ChDir defaultPath
            On Error GoTo 0
        End If
    End If

    ' Show folder browse dialog
    Set folder = shellApp.BrowseForFolder(0, prompt, 0)

    ' If a folder is selected, return the path
    If Not folder Is Nothing Then
        BrowseForFolder = folder.Self.Path
        Debug.Print &quot;Folder selected: &quot; &amp; BrowseForFolder
    Else
        ' If no folder is selected, return an empty string
        BrowseForFolder = &quot;&quot;
        Debug.Print &quot;No folder selected.&quot;
    End If
End Function


Sub ClearImmediateWindow()
    On Error GoTo ErrorHandler

    ' Attempt to reset the Immediate window
    Application.VBE.CommandBars(&quot;Immediate&quot;).Reset
    Exit Sub

ErrorHandler:
    ' Handle specific error here
    MsgBox &quot;Error occurred: &quot; &amp; Err.Description, vbCritical
End Sub
</code></pre>
<p>The problem I'm having is, the extractedpath and header variables don't appear to be getting set. the debug.prints don't ever show in the immediate window. I just get</p>
<pre><code>Matching PDF: 
 with Excel: Column1value Column2value
</code></pre>
<p>over and over again going through all the Columns buy not matching to the section in the word document.</p>
<p>Does anyone see anything I'm missing or a better way to do this? I know it's a bit complicated, so if any clarifications need to be made, feel free to ask in comments.</p>
    

                    <ul class='ml0 list-ls-none js-post-tag-list-wrapper d-inline'><li class='d-inline mr4 js-post-tag-list-item'>excel</li><li class='d-inline mr4 js-post-tag-list-item'>vba</li><li class='d-inline mr4 js-post-tag-list-item'>ms-word</li></ul>
                
            
            Share
        

                        Improve this question
                    

                    <button type="button"
                            id="btnFollowPost-79541816" class="s-btn s-btn__link js-follow-post js-follow-question js-gps-track"
                            data-gps-track="post.click({ item: 14, priv: 0, post_type: 1 })"
                            data-controller="s-tooltip " data-s-tooltip-placement="bottom"
                            data-s-popover-placement="bottom" aria-controls=""
                            title="Follow this question to receive notifications">
                        Follow
                        <input type="hidden" id="voteFollowHash" value="70:3:31e,16:2aa0f8a228b69125,10:1744028632,16:b541c5178e0b22e5,8:79541816,ac76e0a7355e68a49777b54faa4d3aff740733bdfde7b978afce05e3deccea1b" />
                    </button>
                

            edited <span title='2025-03-28 16:25:17Z' class='relativetime'>Mar 28 at 16:25</span>
        
        
        Tim Williams
        
            <span class="reputation-score" title="reputation score 167,256" dir="ltr">167k</span><span title="8 gold badges" aria-hidden="true"><span class="badge1"></span><span class="badgecount">8</span></span><span class="v-visible-sr">8 gold badges</span><span title="100 silver badges" aria-hidden="true"><span class="badge2"></span><span class="badgecount">100</span></span><span class="v-visible-sr">100 silver badges</span><span title="141 bronze badges" aria-hidden="true"><span class="badge3"></span><span class="badgecount">141</span></span><span class="v-visible-sr">141 bronze badges</span>
        
    
            asked <span title='2025-03-28 15:27:27Z' class='relativetime'>Mar 28 at 15:27</span>
        
        
        Matt Williamson<span class="d-none" itemprop="name">Matt Williamson</span>
        
            <span class="reputation-score" title="reputation score " dir="ltr">7,119</span><span title="1 gold badge" aria-hidden="true"><span class="badge1"></span><span class="badgecount">1</span></span><span class="v-visible-sr">1 gold badge</span><span title="25 silver badges" aria-hidden="true"><span class="badge2"></span><span class="badgecount">25</span></span><span class="v-visible-sr">25 silver badges</span><span title="38 bronze badges" aria-hidden="true"><span class="badge3"></span><span class="badgecount">38</span></span><span class="v-visible-sr">38 bronze badges</span>
        
    
            <span class="d-none" itemprop="commentCount">4</span> 
    
        
            <ul class="comments-list js-comments-list"
                    data-remaining-comments-count="0"
                    data-canpost="false"
                    data-cansee="true"
                    data-comments-unavailable="false"
                    data-addlink-disabled="true">

                        <li id="comment-140274600" class="comment js-comment " data-comment-id="140274600" data-comment-owner-id="7599798" data-comment-score="0">
        
            
                <span class="comment-copy">Would be good if you could limit the code you show to only the relevant parts - no one needs to see how you select the folder, how you open word, what you do with &quot;invalid files&quot; or the code of the error handler. Makes it really hard to find the right piece of code. Anyhow, as no one has the PDFs you have, I have no clue how we could help. Have you debugged  the code? Does it find any range with Arial 12 at all?</span>
                
                
&ndash;&nbsp;FunThomas
                
                <span class="comment-date" dir="ltr">
                    <span class="v-visible-sr">Commented</span>
                    <span title='2025-03-28 16:22:19Z, License: CC BY-SA 4.0' class='relativetime-clean'>Mar 28 at 16:22</span>
                </span>
            
        
    </li>
    <li id="comment-140274676" class="comment js-comment " data-comment-id="140274676" data-comment-owner-id="478884" data-comment-score="0">
        
            
                <span class="comment-copy">You need to step through your code using the debugger and see where the execution goes.  <code>extractedText = Trim(Mid(rng.Text, InStr(rng.Text, &quot;:&quot;) + 1))</code>  The paragraph ends with &quot;:&quot; though, so what text are you trying to get here?  You&#39;re really interested in the text <i>after</i> the header paragraph.  It would be useful if you could share an example Word doc (suitably redacted) for folks to test with.</span>
                
                
&ndash;&nbsp;Tim Williams
                
                <span class="comment-date" dir="ltr">
                    <span class="v-visible-sr">Commented</span>
                    <span title='2025-03-28 16:38:26Z, License: CC BY-SA 4.0' class='relativetime-clean'>Mar 28 at 16:38</span>
                </span>
                        <span title="this comment was edited 3 times">
                            
                        </span>
            
        
    </li>
    <li id="comment-140274788" class="comment js-comment " data-comment-id="140274788" data-comment-owner-id="5211752" data-comment-score="1">
        
            
                    <span title="number of &#x27;useful comment&#x27; votes received"
                            class="cool">1</span>
            
        
                <span class="comment-copy">Instead of crawling through the paragraphs use the <code>Find</code> function to go straight to the heading. Search for the style Heading 1 as you have stated that is what is used. You can then use a predefined bookmark to return all the text for that heading.</span>
                
                
&ndash;&nbsp;Timothy Rylatt
                
                <span class="comment-date" dir="ltr">
                    <span class="v-visible-sr">Commented</span>
                    <span title='2025-03-28 17:04:37Z, License: CC BY-SA 4.0' class='relativetime-clean'>Mar 28 at 17:04</span>
                </span>
            
        
    </li>
    <li id="comment-140280043" class="comment js-comment " data-comment-id="140280043" data-comment-owner-id="2282445" data-comment-score="0">
        
            
                <span class="comment-copy">@K J Thanks, that helps to sort things a bit.</span>
                
                
&ndash;&nbsp;Matt Williamson
                
                <span class="comment-date" dir="ltr">
                    <span class="v-visible-sr">Commented</span>
                    <span title='2025-03-30 18:46:17Z, License: CC BY-SA 4.0' class='relativetime-clean'>Mar 30 at 18:46</span>
                </span>
            
        
    </li>

            </ul>
	    

                        Add a comment
                    <span class="js-link-separator dno">&nbsp;|&nbsp;</span>
            
                 
                                <h2 class="mb0" data-answercount="1">
                                        1 Answer
                                    <span style="display:none;" itemprop="answerCount">1</span>
                                </h2>
                            
                            
        <label class="flex--item fs-caption" for="answer-sort-dropdown-select-menu">
            Sorted by:
        </label>
        
            Reset to default
        
    
        <input type="hidden" id="voteUpHash" value="70:3:31e,16:272a506f86d43dd9,10:1744028632,16:c8b892c3ba1be86e,8:79542642,8580946d36b4704fae683d55795a2852971821a417b92f0942bb290893f905e8" />
        
1        
        <button
                class="js-vote-down-btn flex--item mb8 s-btn s-btn__muted s-btn__outlined bar-pill bc-black-225 f:bc-theme-secondary-400 f:bg-theme-secondary-400 f:fc-black-050 h:bg-theme-primary-200"
				id=downvote-btn-79542642
                title="This answer is not useful"
                aria-pressed="false"
                aria-label="Down vote"
                data-selected-classes="fc-theme-primary bc-theme-primary bg-theme-primary-100"
                data-unselected-classes="bc-black-225 f:bc-theme-secondary-400 f:bg-theme-secondary-400 f:fc-black-050 h:bg-theme-primary-200">
            
        </button>
        <input type="hidden" id="voteDownHash" value="70:3:31e,16:38a658e8e14842b7,10:1744028632,16:de0041fb0e2037fc,8:79542642,bd4b95caba635690d96bb6bbb4a0264b06f9a29b11c26b2a0472b3f83f0bb1e3" />


<p>If the converted content really does include content in the Heading1 Style, all you need is some simple Word VBA code like:</p>
<pre><code>Sub GetHeadingSpanText()
Dim RngHd As Range, strOut As String
With ActiveDocument.Range
  With .Find
    .ClearFormatting
    .Replacement.ClearFormatting
    .Style = wdStyleHeading1
    .Replacement.Text = &quot;&quot;
    .Wrap = wdFindStop
    .Forward = True
    .Format = True
    .Text = &quot;&quot;
  End With
  Do While .Find.Execute
    Set RngHd = .Paragraphs(1).Range
    Set RngHd = RngHd.GoTo(What:=wdGoToBookmark, Name:=&quot;\HeadingLevel&quot;)
    RngHd.Start = RngHd.Paragraphs.First.Range.End: strOut = RngHd.Text: MsgBox strOut
    .Collapse wdCollapseEnd
  Loop
End With
Set RngHd = Nothing
End Sub
</code></pre>
<p>If your 12pt Arial Bold content isn't actually a Heading Style, all you need is:</p>
<pre><code>Sub GetHeadingSpanText()
Dim RngHd As Range, strOut As String
With ActiveDocument.Range
  With .Find
    .ClearFormatting
    .Replacement.ClearFormatting
    .Replacement.Text = &quot;&quot;
    .Text = &quot;&quot;
    .Wrap = wdFindContinue
    .Forward = True
    .Format = True
    With .Font
      .Name = &quot;Arial&quot;
      .Size = 12
      .Bold = 12
    End With
    .Replacement.Style = wdStyleHeading1
    .Execute Replace:=wdReplaceAll
    .ClearFormatting
    .Replacement.ClearFormatting
    .Style = wdStyleHeading1
    .Wrap = wdFindStop
  End With
  Do While .Find.Execute
    Set RngHd = .Paragraphs(1).Range
    Set RngHd = RngHd.GoTo(What:=wdGoToBookmark, Name:=&quot;\HeadingLevel&quot;)
    RngHd.Start = RngHd.Paragraphs.First.Range.End: strOut = RngHd.Text: MsgBox strOut
    .Collapse wdCollapseEnd
  Loop
End With
Set RngHd = Nothing
End Sub
</code></pre>
<p>I'll leave you to incorporate the Word code into your existing process.</p></article><footer class="article-footer"><div class="article-tags">
                    继续浏览有关
                    <a href="/tag/988760.html" rel="tag">vbaOpen PDF39s as Word docs and add sections as cells in excelStack Overflow</a>                    的文章
                </div><div class="share"><h5>分享到</h5><div class="bdsharebuttonbox"><a class="bds_weixin" rel="nofollow" data-cmd="weixin"></a><a class="bds_tsina" rel="nofollow" data-cmd="tsina"></a><a class="bds_sqq" rel="nofollow" data-cmd="sqq"></a><a class="bds_qzone" rel="nofollow" data-cmd="qzone"></a><a class="bds_youdao" rel="nofollow" data-cmd="youdao"></a><a class="bds_baidu" rel="nofollow" data-cmd="baidu"></a><a class="bds_douban" rel="nofollow" data-cmd="douban"></a><a class="bds_bdhome" rel="nofollow" data-cmd="bdhome"></a><a class="bds_fbook" rel="nofollow" data-cmd="fbook"></a><a class="bds_twi" rel="nofollow" data-cmd="twi"></a><a class="bds_more" rel="nofollow" data-cmd="more"></a><a class="bds_count" rel="nofollow" data-cmd="count"></a></div><p>
                                转转请注明出处：<span>http://www.815494.com/programmer/1744028635a2333477.html</span></p></div></footer><div class="relates"><h3>与本文相关的文章</h3><ul><li><a href="/programmer/1744028635a2333477.html">vba - Open PDF&amp;#39;s as Word docs and add sections as cells in excel - Stack Overflow</a></li></ul></div><div id="respond" class="no_webshot"><form action="/comment/create/2333477.html?safe_token=f7IzvaWjy16pFRMy8zcNsAiO1CmvSP8aAgmIrvpdynN3wxJzuC_2F_2FwffipDLydb1XFeQPpmhaYSGEGRdOZADnvw_3D_3D" method="post" id="commentform"><div class="comt-title"><span style="font-size: 16px;">发布评论</span></div><div class="comt"><div class="comt-box"><textarea placeholder="写点什么..." class="input-block-level comt-area" name="message" id="comment" cols="100%" rows="3" tabindex="1" onkeydown="if(event.ctrlKey&amp;&amp;event.keyCode==13){document.getElementById('submit').click();return false};"></textarea><div class="comt-ctrl"><button class="btn btn-primary pull-right" type="submit" name="submit" tabindex="5"><i class="icon-ok-circle icon-white icon12"></i>提交评论
</button></div></div></div></form></div><div id="postcomments"><h3 id="comments">
        评论列表<b>(0)</b></h3><ol class="commentlist"><li class="sn-empty">暂无评论</li></ol></div>
</div></div><aside class="sidebar"><div class="widget d_postlist"><h3 class="widget_tit">热门推荐</h3><ul><li><a href="/programmer/1743922222a2317273.html"><span class="thumbnail"><img src="/view/img/nopic.png" alt="maven - Importing lombok and its annotation processor from custom dependency - Stack Overflow"/></span><span class="text">maven - Importing lombok and its annotation processor from custom dependency - Stack Overflow</span><span class="muted">1天前</span><span class="muted">0评论</span></a></li><li><a href="/programmer/1743927729a2318243.html"><span class="thumbnail"><img src="/view/img/nopic.png" alt="Dimensional Modeling- How to handle Code-Name-Description lookup table - Stack Overflow"/></span><span class="text">Dimensional Modeling- How to handle Code-Name-Description lookup table - Stack Overflow</span><span class="muted">1天前</span><span class="muted">0评论</span></a></li><li><a href="/programmer/1743942563a2320801.html"><span class="thumbnail"><img src="/view/img/nopic.png" alt="servicenow - JavaScript Check length of String and if it&amp;#39;s longer than 4, save variable in different variable - Stack Ov"/></span><span class="text">servicenow - JavaScript Check length of String and if it&amp;#39;s longer than 4, save variable in different variable - Stack Ov</span><span class="muted">1天前</span><span class="muted">0评论</span></a></li><li><a href="/programmer/1743949158a2321936.html"><span class="thumbnail"><img src="/view/img/nopic.png" alt="python - FreeCodeCamp Scrapy Beginners Course Part 8: Fake Headers &amp;amp; User Agents - Stack Overflow"/></span><span class="text">python - FreeCodeCamp Scrapy Beginners Course Part 8: Fake Headers &amp;amp; User Agents - Stack Overflow</span><span class="muted">1天前</span><span class="muted">0评论</span></a></li><li><a href="/programmer/1743953638a2322714.html"><span class="thumbnail"><img src="/view/img/nopic.png" alt="material ui - MUI Resizable Textfield Overflow CSS Issue - Stack Overflow"/></span><span class="text">material ui - MUI Resizable Textfield Overflow CSS Issue - Stack Overflow</span><span class="muted">1天前</span><span class="muted">0评论</span></a></li><li><a href="/programmer/1743986464a2326331.html"><span class="thumbnail"><img src="/view/img/nopic.png" alt="html - How to resolve thymeleaf fragment(th:replace) is not working with spring boot? - Stack Overflow"/></span><span class="text">html - How to resolve thymeleaf fragment(th:replace) is not working with spring boot? - Stack Overflow</span><span class="muted">15小时前</span><span class="muted">0评论</span></a></li><li><a href="/programmer/1743987071a2326432.html"><span class="thumbnail"><img src="/view/img/nopic.png" alt="javascript - Find a missing number in a sequence using .reduce - Stack Overflow"/></span><span class="text">javascript - Find a missing number in a sequence using .reduce - Stack Overflow</span><span class="muted">15小时前</span><span class="muted">0评论</span></a></li><li><a href="/programmer/1744028752a2333498.html"><span class="thumbnail"><img src="/view/img/nopic.png" alt="powerbi - Power Automate running query against Power BI data set error - Stack Overflow"/></span><span class="text">powerbi - Power Automate running query against Power BI data set error - Stack Overflow</span><span class="muted">3小时前</span><span class="muted">0评论</span></a></li></ul></div><div class="widget d_tag"><h3 class="widget_tit">热门标签</h3><div class="d_tags"><a href="/tag/989395.html">javascriptAdd key value (object)(3)</a><a href="/tag/989385.html">syntaxWhat is the keyword argume(3)</a><a href="/tag/989368.html">javascriptrouterlink to path wit(3)</a><a href="/tag/989364.html">000 small files(3)</a><a href="/tag/989327.html">hyperparametersHyperopt attribut(3)</a><a href="/tag/989256.html">javascriptHow to Insert records(3)</a><a href="/tag/989243.html">powerbiJoin DimDate to Fact wher(3)</a><a href="/tag/989231.html">javascriptRender different compo(3)</a><a href="/tag/989218.html">javascriptReferenceError Client(3)</a><a href="/tag/989212.html">groovyUsing methods from shared(3)</a><a href="/tag/989197.html">javascriptMake alert window when(3)</a><a href="/tag/989174.html">javascriptDon39t pass object by(3)</a><a href="/tag/989161.html">javascriptCount occurrences in a(3)</a><a href="/tag/989143.html">javascriptRunning code only afte(3)</a><a href="/tag/989133.html">Powershell script to delete dupl(3)</a><a href="/tag/989128.html">Two way hashing JSON String in J(3)</a><a href="/tag/989022.html">javascriptDropdown component for(3)</a><a href="/tag/988937.html">javascriptHow can I call a funct(3)</a><a href="/tag/988929.html">Checkbox validation for checking(3)</a><a href="/tag/988924.html">javascriptselect web camera on g(3)</a></div></div><div class="widget widget_recent_entries"><h3 class="widget_tit">近期文章</h3><ul><li><a href="/operation/1744042957a2335981.html">macOs｜校园网弹不出认证</a></li><li><a href="/operation/1744042840a2335959.html">计算机显示不出来验证码,如何解决网页图片红叉显示不出来验证码图片没显示的电脑故障...</a></li><li><a href="/operation/1744042496a2335903.html">Windows 重新安装自带计算器</a></li><li><a href="/operation/1744042476a2335899.html">WinCC中通过脚本禁用或启用Windows快捷键</a></li><li><a href="/operation/1744042379a2335882.html">Windows 7错误恢复一直循环怎么办？</a></li><li><a href="/seo/1701335978a994070.html">networkx从路径到顶点的最短路径</a></li><li><a href="/seo/1701335961a994068.html">igraph R中从根到叶的有向树图中的所有路径</a></li><li><a href="/seo/1701335941a994066.html">找到最短路径小于或等于Python中非循环有向图的给定值</a></li><li><a href="/seo/1701335917a994064.html">到达Java中目标的最短路径</a></li><li><a href="/seo/1701335884a994062.html">排序BST使用常量内存为O（n）</a></li><li><a href="/web/1734010175a1543806.html">大学生常用网站链接汇总</a></li><li><a href="/web/1734009990a1543760.html">网络推广必知的4种常用短网址</a></li><li><a href="/web/1734009977a1543756.html">常见的门户网站</a></li><li><a href="/web/1734009962a1543754.html">一步步配置腾讯云服务器Ubuntu 通过域名访问自己的网页tomcat（详细基础）</a></li><li><a href="/web/1734009943a1543748.html">亲测 https 抓包网页和app_另外弱网络制作</a></li><li><a href="/html/1692198260a106499.html">程序员在平台兼职接单，月入30K，方法我全写出来了！（附接单渠道和注意事项）</a></li><li><a href="/html/1692198229a106495.html">Apache Kafka - 流式处理</a></li><li><a href="/html/1692198194a106491.html">python程序文件扩展名主要是什么_python程序文件扩展名知识点详解</a></li><li><a href="/html/1692198163a106488.html">【网页设计大作业】：端午主题（HTML+CSS+JavaScript）——中国传统文化（6页）</a></li><li><a href="/html/1692198121a106483.html">C++跑酷小游戏</a></li><li><a href="/programmer/1744043032a2335995.html">javascript - @onmouseup not firing at vuejs 2 - Stack Overflow</a></li><li><a href="/programmer/1744043002a2335989.html">javascript - Add key value (object) pair to all objects in array - Stack Overflow</a></li><li><a href="/programmer/1744042981a2335986.html">about javascript onclick foreach function - Stack Overflow</a></li><li><a href="/programmer/1744042940a2335979.html">wordpress - Any helper functions in &amp;quot;WP Offload Media&amp;quot; to transfer or save files to S3 without creating an att</a></li><li><a href="/programmer/1744042923a2335975.html">javascript - Highcharts categories from data array - Stack Overflow</a></li></ul></div></aside></section><footer class="footer"><div class="footer-inner"><div class="copyright pull-left"><p>
            CopyRight © 2022 All Rights Reserved. Powered by <a href="/">科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始</a><a target="_blank" rel="nofollow" href="https://beian.miit.gov.cn/" style="font-size: 12px;">豫ICP备2022005478号-23</a></p></div></div></footer><script>
var _hmt = _hmt || [];
(function() {
  var hm = document.createElement("script");
  hm.src ="https://hm.baidu.com/hm.js?6e6f0c58c8f44e7a06c9d4ce43a6da98";
  var s = document.getElementsByTagName("script")[0]; 
  s.parentNode.insertBefore(hm, s);
})();
</script><script src="/view/template/d8/js/jquery.js?2.3.0"></script></body></html>