最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

jquery - javascript: read plain html-string and using DOMparser change links path - Stack Overflow

programmeradmin6浏览0评论

In my angular app using one of WYSIWYG i can insert links without protocol. And this is bad:

i need to parse string and change all link's (if thay didn't have protocol to http://...)

and i try to do so:

var content = '<p>7</p><p>77</p><p><br></p><p><a href="" rel="nofollow">;/a></p><p><br></p><p><a href="example" target="_blank">example</a></p><p><br></p><p><a href="ftp://localhost">ftp://localhost</a></p><p><br></p><p><a href="localhost">localhost</a><br></p>';

var addProtocolToLinks = function(URL){
    var protocols = ['http', 'https', 'ftp', 'sftp', 'ssh', 'smtp'];
    var withProtocol = false;
    if (URL.length > 0){
      protocols.forEach(function(el) {
        if (URL.slice(0,4).indexOf(el) > -1){
          withProtocol = true;
        }
      });
      var newURL =  URL;
      if (!withProtocol){
        newURL = 'http://' + URL;
      }
      console.log(newURL + '   ' + URL);
      return newURL;
    }
};

var parser = new DOMParser();
var doc = parser.parseFromString(content, "text/html");
var links = doc.getElementsByTagName("a");
for(var i=0; i<links.length; i++) {
    links[i].setAttribute('href', addProtocolToLinks(links[i].href));
    console.log('result: ' + links[i].getAttribute('href'));
}

console.log('result html: ');
console.log(doc);  // also i need to fetch only my var content part, without html, body etc

/

But for some reasons it's not working properly. What i do wrong?

In my angular app using one of WYSIWYG i can insert links without protocol. And this is bad:

i need to parse string and change all link's (if thay didn't have protocol to http://...)

and i try to do so:

var content = '<p>7</p><p>77</p><p><br></p><p><a href="http://example." rel="nofollow">http://example.</a></p><p><br></p><p><a href="example." target="_blank">example.</a></p><p><br></p><p><a href="ftp://localhost">ftp://localhost</a></p><p><br></p><p><a href="localhost">localhost</a><br></p>';

var addProtocolToLinks = function(URL){
    var protocols = ['http', 'https', 'ftp', 'sftp', 'ssh', 'smtp'];
    var withProtocol = false;
    if (URL.length > 0){
      protocols.forEach(function(el) {
        if (URL.slice(0,4).indexOf(el) > -1){
          withProtocol = true;
        }
      });
      var newURL =  URL;
      if (!withProtocol){
        newURL = 'http://' + URL;
      }
      console.log(newURL + '   ' + URL);
      return newURL;
    }
};

var parser = new DOMParser();
var doc = parser.parseFromString(content, "text/html");
var links = doc.getElementsByTagName("a");
for(var i=0; i<links.length; i++) {
    links[i].setAttribute('href', addProtocolToLinks(links[i].href));
    console.log('result: ' + links[i].getAttribute('href'));
}

console.log('result html: ');
console.log(doc);  // also i need to fetch only my var content part, without html, body etc

http://jsfiddle/r3dgeo23/

But for some reasons it's not working properly. What i do wrong?

Share Improve this question edited Aug 30, 2015 at 14:35 byCoder asked Jul 21, 2015 at 11:33 byCoderbyCoder 9,18427 gold badges119 silver badges259 bronze badges
Add a ment  | 

5 Answers 5

Reset to default 4 +25

you had almost everything right except that:

link[i].href

returns undefined if no protocol set. Therefore you gave you function addProtocolToLinks(undefined) and it did not work.

You can use:

getAttribute('href');

to make it work, see this fiddle: http://jsfiddle/r3dgeo23/3/

/////EDIT

Here is a fiddle for only fetching the content part and not the whole html: http://jsfiddle/r3dgeo23/5/

/////EDIT2

Create the container with unique id within your function:

var container = document.createElement('div');
container.setAttribute("id", "content");
container.innerHTML = content;

http://jsfiddle/r3dgeo23/6/

If I pletely understood your question, this should work...

    function jsF_addHTTP( url )
    {

        if (url !== "") 
        {
            // Insert HTTP if it doesn't exist.

            if ( !url.match("^(http|https|ftp|sftp|ssh|smtp)://") ) 
            {
                url = "http://" + url;
            }
        }
        return url;
    }

Try this.. It is WORKING

var addProtocolToLinks = function(URL){
protocols = ['http', 'https', 'ftp', 'sftp', 'ssh', 'smtp'];
protocols.forEach(function(item) {
    if(url.indexOf(item) != -1) {
    newUrl = "http://"+url.substr(url.indexOf("//")+2);
   }    
});
return newUrl;
}

sample demo is here http://jsfiddle/d9p9534h/

Let me know if it worked

How about this?

function ensureProtocol(href) {
    var match = href.match(/^((\w+)\:)?(.*)/);
    var protocol = match[1] || 'https:';
    return protocol + match[3];
}

NOTE: Not every URI has an authority part. That's why the regular expression does not include //. See this article

function Protocol( url )
    {

        if (url !== "") 
        {


            if ( !url.match("^(http|https|ftp|sftp|ssh|smtp)://") ) 
            {
                url = "http://" + url;
            }
        }
        return url;
    }
发布评论

评论列表(0)

  1. 暂无评论