{"versions":{"1.1.1":{"name":"@agnai/sentencepiece-js","version":"1.1.1","description":"Sentencepiece tokenization for natural language processing, JS version.","main":"dist/index.js","publishConfig":{"access":"public"},"exports":{"imports":"./dist/index.js","default":"./dist/index.js"},"scripts":{"build":"./build.sh; rollup --config","test":"web-test-runner \"test/**/*.test.js\" \"src/**/*.test.js\" --node-resolve","test:watch":"web-test-runner \"test/**/*.test.js\" \"src/**/*.test.js\" --node-resolve --watch","develop":"web-dev-server --node-resolve --watch --open"},"repository":{"type":"git","url":"git+https://github.com/JanKaul/sentencepiece.git"},"keywords":["machine_learning","albert","nlp","sentencepiece"],"author":{"name":"sceuick, devilyouwei, Jan Kaul"},"license":"Apache-2.0","bugs":{"url":"https://github.com/JanKaul/sentencepiece/issues"},"homepage":"https://github.com/devilyouwei/sentencepiece","devDependencies":{"@esm-bundle/chai":"^4.3.4-fix.0","@rollup/plugin-commonjs":"^20.0.0","@rollup/plugin-node-resolve":"^13.0.4","@rollup/plugin-typescript":"^8.2.5","@rollup/plugin-wasm":"^5.1.2","@web/dev-server":"^0.1.22","@web/test-runner":"^0.13.16","rollup":"^2.56.2","tslib":"^2.3.1","typescript":"^4.4.3"},"dependencies":{"app-root-path":"^3.1.0"},"gitHead":"56c3dd2e640932362fcc4295b19b4196f400817a","_id":"@agnai/sentencepiece-js@1.1.1","_nodeVersion":"16.17.0","_npmVersion":"8.15.0","dist":{"integrity":"sha512-h2+XPrJVLuVLl+2+3iZPWcTw6Fs2NNulnxyh7LoI1hzHHib1wDC6KTmTrDJlLq7/lr5QFYpeMz2rlTFQrS0C0g==","shasum":"00a06c796c96fcc252d0adafb0c41b32fb313cec","tarball":"http://123.232.10.234:8212/nexus/content/groups/npm-public/@agnai/sentencepiece-js/-/sentencepiece-js-1.1.1.tgz","fileCount":4,"unpackedSize":774676,"signatures":[{"keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA","sig":"MEYCIQCqEDUOTSvQ6g+iAReRXaywewo3756nBaf5SGQI5FsNywIhAKvHz8dUTvm+lwMi9YVrkHXqLsMQjCLxSmDFhZPZ9ft8"}],"size":274952},"_npmUser":{"name":"anonymous","email":"sceuick@gmail.com"},"directories":{},"maintainers":[{"name":"anonymous","email":"sceuick@gmail.com"}],"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/sentencepiece-js_1.1.1_1691903545753_0.4087734958005338"},"_hasShrinkwrap":false,"_cnpmcore_publish_time":"2023-08-13T05:12:26.009Z","publish_time":1691903546009,"_source_registry_name":"default","contributors":[]}},"dist-tags":{"latest":"1.1.1"},"name":"@agnai/sentencepiece-js","time":{"created":"2023-08-13T05:14:06.073Z","modified":"2023-08-13T05:14:06.420Z","1.1.1":"2023-08-13T05:12:26.009Z"},"readme":"# Javascript wrapper for the sentencepiece library\n\n## Build\n\nSentencepiece is compiled to webassembly using emscripten.\n\nTo rebuild this project\n\n```bash\n\nyarn\n\ngit clone https://github.com/google/sentencepiece.git\n\nyarn build\n\n```\n\n## Use\n\nTo use this tool\n\n```js\nconst { SentencePieceProcessor, cleanText } = require('../dist')\nconst ROOT = require('app-root-path')\n\nasync function main() {\n  let text = 'I am still waiting on my card?'\n  let cleaned = cleanText(text)\n\n  let spp = new SentencePieceProcessor()\n  await spp.load(`${ROOT}/test/30k-clean.model`)\n  let ids = spp.encodeIds(cleaned)\n  console.log(ids)\n  let str = spp.decodeIds(ids) // list ids->number\n  console.log(str)\n\n  let pieces = spp.encodePieces(cleaned) // list tokens->string\n  console.log(pieces)\n}\nmain()\n```\n\n## Note\n\n- sceuick removed `unhandledException` and `unhandledRejection` handlers that cause the unnecessary console dumps.\n- devilyouwei updated this repo to make this module support the js `require` keyword and added the using example.\n\n- 2023-1-10, devilyouwei added `encodePieces`.","users":{}}