最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

image processing - Extract IPTC information from JPEG using Javascript - Stack Overflow

programmeradmin3浏览0评论

I'm trying to extract IPTC photo caption information from a JPEG file using Javascript. (I know I can do this server-side, but I'm looking specifically for a Javascript solution.)

I found this script, which extracts EXIF information ... but I'm not sure how to adapt it to grab IPTC data.

Are there any existing scripts that offer such functionality? If not, how would you modify the EXIF script to also parse IPTC data?

UPDATE

I've modified the EXIF script I linked above. It sorta does what I want, but it's not grabbing the right data 100 percent of the time.

After line 401, I added:

else if (iMarker == 237) {
        // 0xED = Application-specific 13 (Photoshop IPTC)                
        if (bDebug) log("Found 0xFFED marker");   
        return readIPTCData(oFile, iOffset + 4, getShortAt(oFile, iOffset+2, true)-2);                       
}

And then elsewhere in the script, I added this function:

function readIPTCData(oFile, iStart, iLength) {
    exif = new Array();

if (getStringAt(oFile, iStart, 9) != "Photoshop") {
    if (bDebug) log("Not valid Photoshop data! " + getStringAt(oFile, iStart, 9));
    return false;
}

var output = '';
var count = 0;
two = new Array();
for (i=0; i<iLength; i++) {
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 120) {
      var caption = getString2At(oFile, iStart + i + 2, 800);
   }
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 80) {
      var credit = getString2At(oFile, iStart + i + 2, 300);
   }       
}

exif['ImageDescription'] = caption;
exif['Artist'] = credit;

return exif;

}

So let me now modify my question slightly. How can the function above be improved?

I'm trying to extract IPTC photo caption information from a JPEG file using Javascript. (I know I can do this server-side, but I'm looking specifically for a Javascript solution.)

I found this script, which extracts EXIF information ... but I'm not sure how to adapt it to grab IPTC data.

Are there any existing scripts that offer such functionality? If not, how would you modify the EXIF script to also parse IPTC data?

UPDATE

I've modified the EXIF script I linked above. It sorta does what I want, but it's not grabbing the right data 100 percent of the time.

After line 401, I added:

else if (iMarker == 237) {
        // 0xED = Application-specific 13 (Photoshop IPTC)                
        if (bDebug) log("Found 0xFFED marker");   
        return readIPTCData(oFile, iOffset + 4, getShortAt(oFile, iOffset+2, true)-2);                       
}

And then elsewhere in the script, I added this function:

function readIPTCData(oFile, iStart, iLength) {
    exif = new Array();

if (getStringAt(oFile, iStart, 9) != "Photoshop") {
    if (bDebug) log("Not valid Photoshop data! " + getStringAt(oFile, iStart, 9));
    return false;
}

var output = '';
var count = 0;
two = new Array();
for (i=0; i<iLength; i++) {
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 120) {
      var caption = getString2At(oFile, iStart + i + 2, 800);
   }
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 80) {
      var credit = getString2At(oFile, iStart + i + 2, 300);
   }       
}

exif['ImageDescription'] = caption;
exif['Artist'] = credit;

return exif;

}

So let me now modify my question slightly. How can the function above be improved?

Share Improve this question edited Apr 29, 2011 at 15:24 jawns317 asked Apr 29, 2011 at 13:59 jawns317jawns317 1,7462 gold badges17 silver badges26 bronze badges 4
  • This will only be possible in browsers that support the new-ish HTML5 file APIs. That script you linked appears to get image data by fetching it from the server, which is probably not what you're wanting to do. – Pointy Commented Apr 29, 2011 at 14:05
  • Yes, that's perfectly fine -- I'm developing this for internal use only, and we've all got the latest browsers, so it's not a problem. – jawns317 Commented Apr 29, 2011 at 14:08
  • This isn't working for me. Do you have a jsfiddle example? – Mark Robson Commented Jun 16, 2014 at 16:53
  • For anyone else who DOES need cross-browser support, jDataView is a nice way to work with binary data in JavaScript. – abettermap Commented Dec 2, 2014 at 1:09
Add a ment  | 

3 Answers 3

Reset to default 6

For what it's worth, I extrapolated on this a bit... I haven't done a whole lot of testing, but the few test images I have seem to work.

    var bDebug = false;

    var fieldMap = {
        120 : 'caption',
        110 : 'credit',
        25 : 'keywords',
        85 : 'byline',
        122 : 'captionWriter',
        105 : 'headline',
        116 : 'copyright',
        15 : 'category'
    };

    function readIPTCData(oFile, iStart, iLength) {
        var data = {};

        if (oFile.getStringAt(iStart, 9) != "Photoshop") {
            if (bDebug) log("Not valid Photoshop data! " + oFile.getStringAt(iStart, 9));
            return false;
        }

        var fileLength = oFile.getLength();

        var length, offset, fieldStart, title, value;
        var FILE_SEPARATOR_CHAR = 28,
            START_OF_TEXT_CHAR = 2;

        for (var i = 0; i < iLength; i++) {

            fieldStart = iStart + i;
            if(oFile.getByteAt(fieldStart) == START_OF_TEXT_CHAR && oFile.getByteAt(fieldStart + 1) in fieldMap) {
                length = 0;
                offset = 2;

                while(
                    fieldStart + offset < fileLength &&
                    oFile.getByteAt(fieldStart + offset) != FILE_SEPARATOR_CHAR &&
                    oFile.getByteAt(fieldStart + offset + 1) != START_OF_TEXT_CHAR) { offset++; length++; }

                if(!length) { continue; }

                title = fieldMap[oFile.getByteAt(fieldStart + 1)];
                value = oFile.getStringAt(iStart + i + 2, length) || '';
                value = value.replace('\000','').trim();

                data[title] = value;
                i+=length-1;
            }
        }

        return data;

    }

    function findIPTCinJPEG(oFile) {
        var aMarkers = [];

        if (oFile.getByteAt(0) != 0xFF || oFile.getByteAt(1) != 0xD8) {
            return false; // not a valid jpeg
        }

        var iOffset = 2;
        var iLength = oFile.getLength();
        while (iOffset < iLength) {
            if (oFile.getByteAt(iOffset) != 0xFF) {
                if (bDebug) console.log("Not a valid marker at offset " + iOffset + ", found: " + oFile.getByteAt(iOffset));
                return false; // not a valid marker, something is wrong
            }

            var iMarker = oFile.getByteAt(iOffset+1);

            if (iMarker == 237) {
                if (bDebug) console.log("Found 0xFFED marker");
                return readIPTCData(oFile, iOffset + 4, oFile.getShortAt(iOffset+2, true)-2);

            } else {
                iOffset += 2 + oFile.getShortAt(iOffset+2, true);
            }

        }

    }

    IPTC.readFromBinaryFile = function(oFile) {
        return findIPTCinJPEG(oFile);
    }

I'd like to suggest library exifr that works in both Node.js and browser. And it also supports the new HEIC image format.

exifr.parse(input, {iptc: true}).then(output => {
  console.log('IPTC', output)
})

It parses multiple data formats (TIFF/EXIF, ICC, IPTC, XMP, JFIF) but IPTC isn't enabled by default so you need to enabled it in options as seen in the example.

Well, this should get you going on creating your own javascript parser if you can't find a library that already does this.

http://www.iptc/std/photometadata/specification/IPTC-PhotoMetadata%28200907%29_1.pdf

发布评论

评论列表(0)

  1. 暂无评论