最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

html - OpenXML: Table of Contents hyperlinks in Word detected as external links instead of internal ones - Stack Overflow

programmeradmin1浏览0评论

I'm trying to convert HTML content to Word using OpenXML. The issue I'm facing is that the hyperlinks in the table of contents are detected by Word as "Existing File or Web Page" links:

When they should be links to locations within the same document.

The most curious thing is that the hyperlink address is correct, meaning it has the correct identifier for the document location it should navigate to. The only issue is that Word interprets it as an external link rather than an internal one.

If I select "Edit Hyperlink" in a document where the table of contents is correctly formatted, I see this:

It directly opens the "Place in This Document" section and correctly points to where it should.

However, if I do the same in a document where the table of contents is NOT correctly formatted, I see this:

It opens the "File or external web site" section, because it detects the hyperlink points to a location outside the document

This is how I'm performing the conversion:

public override System.IO.Stream Convert()
{
    var memoryStream = new MemoryStream();
    if (!string.IsNullOrEmpty(DocumentContent.Header))
        DocumentContent.Header = Section.Remove(Document.Converter.Format.Word, DocumentContent.Header);
    if (!string.IsNullOrEmpty(DocumentContent.Footer))
        DocumentContent.Footer = Section.Remove(Document.Converter.Format.Word, DocumentContent.Footer);

    string html = DocumentContent.Body;
    html = FixOptionTagBadFormatter(html);
    const string altChunkID = "AltChunkId1";

    WordprocessingDocumentType documentType = ((WordSettings)Settings).IsTemplate ? WordprocessingDocumentType.Template : WordprocessingDocumentType.Document;

    using (var myDoc = WordprocessingDocument.Create(memoryStream, documentType))
    {
        MainDocumentPart mainPart = myDoc.MainDocumentPart;

        if (mainPart == null)
        {
            mainPart = myDoc.AddMainDocumentPart();
            new DocumentFormat.OpenXml.Wordprocessing.Document(new List<OpenXmlElement> { new Body() }).Save(mainPart);
        }

        AlternativeFormatImportPart chunk = mainPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.Xhtml, altChunkID);

        System.IO.Stream chunkStream = chunk.GetStream(FileMode.Create, FileAccess.Write);

        string encoding = GetEncodingDocument(html);
        using (var stringStream = new StreamWriter(chunkStream, Encoding.GetEncoding(encoding)))
        {
            stringStream.Write(html);
        }

        var altChunk = new AltChunk { Id = altChunkID };
        mainPart.Document.Body.InsertAt(altChunk, 0);

        if (DocumentContent.CoverPage != null)
        {
            AddCoverPage(mainPart, DocumentContent.CoverPage, encoding);
        }

        SectionProperties sectionProperties = new SectionProperties();
        mainPart.Document.Body.Append(sectionProperties);

        if (DocumentContent.Footer != null && DocumentContent.Header != null)
        {
            HeaderReference headerReference = new HeaderReference { Type = HeaderFooterValues.Default, Id = "rId1" };
            sectionProperties.Append(headerReference);

            // Add an empty footer to each section
            FooterReference footerReference = new FooterReference() { Type = HeaderFooterValues.Default, Id = "rId2" };
            sectionProperties.Append(footerReference);

            // Add empty header and footer
            HeaderPart headerPart = mainPart.AddNewPart<HeaderPart>("rId1");
            headerPart.Header = new Header();

            FooterPart footerPart = mainPart.AddNewPart<FooterPart>("rId2");
            footerPart.Footer = new Footer();

            string htmlHeader = DocumentContent.Header;
            string htmlFooter = DocumentContent.Footer;

            ChangeHeaderAndFooter(myDoc, headerReference.Id, footerReference.Id, htmlHeader, htmlFooter);
        }

        AddPageMargin(mainPart);

        mainPart.Document.Save();

        return memoryStream;
    }
}

private string GetEncodingDocument(string html)
{
    string encoding = "utf-8";
    string[] texto = html.Split(new string[] { "<meta charset=\"" }, StringSplitOptions.None);
    if (texto.Length == 1)
    {
        return encoding;
    }
    int lengthEncoding = texto[1].IndexOf('"');
    encoding = texto[1].Substring(0, lengthEncoding);

    return encoding;
}

private void AddCoverPage(MainDocumentPart mainPart, CoverPage CoverPage, string encoding)
{
    string coverPageHtml = CoverPage.Body;
    const string coverPageAltChunkID = "CoverPageAltChunkId1";

    AlternativeFormatImportPart coverPageChunk = mainPart.AddAlternativeFormatImportPart(
        AlternativeFormatImportPartType.Xhtml, coverPageAltChunkID);

    using (System.IO.Stream coverPageChunkStream = coverPageChunk.GetStream(FileMode.Create, FileAccess.Write))
    {
        using (var stringStream = new StreamWriter(coverPageChunkStream, Encoding.GetEncoding(encoding)))
        {
            stringStream.Write(coverPageHtml);
        }
    }

    var coverPageAltChunk = new AltChunk { Id = coverPageAltChunkID };
    mainPart.Document.Body.InsertAt(coverPageAltChunk, 0);
}

The rest of the code is related to the document's header and footer, so I don't think it's relevant to the issue.

Is there a way to force the conversion so that the hyperlinks in the table of contents are recognized as internal links instead of external ones?

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论