te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>html - How to find out charset of text file loaded by input[type="file"] in Javascript - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

html - How to find out charset of text file loaded by input[type="file"] in Javascript - Stack Overflow

programmeradmin4浏览0评论

I want to read user's file and gave him modified version of this file. I use input with type file to get text file, but how I can get charset of loaded file, because in different cases it can be various... Uploaded file has format .txt or something similar and isn't .html :)

var handler = document.getElementById('handler');
var reader = new FileReader();

handler.addEventListener('click', function() {
    reader.readAsText(firstSub.files[0], /* Here I need use a correctly charset */);
});

reader.addEventListener("loadend", function() {
    console.dir(reader.result.split('\n'));
});

I want to read user's file and gave him modified version of this file. I use input with type file to get text file, but how I can get charset of loaded file, because in different cases it can be various... Uploaded file has format .txt or something similar and isn't .html :)

var handler = document.getElementById('handler');
var reader = new FileReader();

handler.addEventListener('click', function() {
    reader.readAsText(firstSub.files[0], /* Here I need use a correctly charset */);
});

reader.addEventListener("loadend", function() {
    console.dir(reader.result.split('\n'));
});
Share Improve this question edited Dec 20, 2017 at 21:24 Александр Копаевич asked Dec 20, 2017 at 21:19 Александр КопаевичАлександр Копаевич 1331 gold badge2 silver badges11 bronze badges 6
  • Will the uploaded document be an HTML document? If not, I don't think that you can get this information. – Scott Marcus Commented Dec 20, 2017 at 21:21
  • It isn't HTML document but maybe there are some methods or tricks to easily find out charset @ScottMarcus – Александр Копаевич Commented Dec 20, 2017 at 21:26
  • I don't think there is any way to get it. – Scott Marcus Commented Dec 20, 2017 at 21:27
  • Why not just omit the encoding part, just call reader.readAsText(firstSub.files[0]). It will assume UTF-8 by default, should work for most text files if preserving the original encoding isn't that important for you. – Chetan Jadhav CD Commented Dec 20, 2017 at 21:31
  • The fact of the matter that is very important to preserve the encoding @ChetanJadhavCD – Александр Копаевич Commented Dec 20, 2017 at 21:41
 |  Show 1 more ment

3 Answers 3

Reset to default 7

In my case (I made a small web app that accepts subtitle .srt files and removes time codes and line breaks, making a printable text), it was enough to foresee 2 types of encoding: UTF-8 and CP1251 (in all cases I tried – with both Latin and Cyrillic letters – these two types are enough). At first I try encoding with UTF-8, and if it is not successful, some characters are replaced by '�'-signs. So, I check the result for presence of these signs, and, if found, the procedure is repeated with CP1251 encoding. So, here is my code:

function onFileInputChange(inputDomElement, utf8 = true) {
    const file = inputDomElement.files[0];
    const reader = new FileReader();
    reader.readAsText(file, utf8 ? 'UTF-8' : 'CP1251');
    reader.onload = () => {
        const result = reader.result;
        if (utf8 && result.includes('�')) {
            onFileInputChange(inputDomElement, false);
            console.log('The file encoding is not utf-8! Trying CP1251...');
        } else {
            document.querySelector('#textarea1').value = file.name.replace(/\.(srt|txt)$/, '').replace(/_+/g, '\ ').toUpperCase() + '\n' + result;
        }
    }
}

You should check out this library encoding.js

They also have a working demo. I would suggest you first try it out with the files that you'll typically work with to see if it detects the encoding correctly and then use the library in your project.

The other solutions didn't work for what I was trying to do, so I decided to create my own module that can detect the charset and language of text files.

You load it via the <script> tag and then use the languageEncoding function to retrieve the charset/encoding:

// index.html

<script src="https://unpkg./detect-file-encoding-and-language/umd/language-encoding.min.js"></script>
// app.js

languageEncoding(file).then(fileInfo => console.log(fileInfo));
// Possible result: { language: english, encoding: UTF-8, confidence: { language: 0.96, encoding: 1 } }

For a more plete example/instructions check out this part of the documentation!

发布评论

评论列表(0)

  1. 暂无评论