radisk.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. ;(function(){
  2. function Radisk(opt){
  3. opt = opt || {};
  4. opt.log = opt.log || console.log;
  5. opt.file = String(opt.file || 'radata');
  6. var has = (Radisk.has || (Radisk.has = {}))[opt.file];
  7. if(has){ return has }
  8. opt.pack = opt.pack || (opt.memory? (opt.memory * 1000 * 1000) : 1399000000) * 0.3; // max_old_space_size defaults to 1400 MB.
  9. opt.until = opt.until || opt.wait || 250;
  10. opt.batch = opt.batch || (10 * 1000);
  11. opt.chunk = opt.chunk || (1024 * 1024 * 1); // 1MB
  12. opt.code = opt.code || {};
  13. opt.code.from = opt.code.from || '!';
  14. opt.jsonify = true;
  15. function ename(t){ return encodeURIComponent(t).replace(/\*/g, '%2A') }
  16. function atomic(v){ return u !== v && (!v || 'object' != typeof v) }
  17. var map = Gun.obj.map;
  18. var LOG = false;
  19. if(!opt.store){
  20. return opt.log("ERROR: Radisk needs `opt.store` interface with `{get: fn, put: fn (, list: fn)}`!");
  21. }
  22. if(!opt.store.put){
  23. return opt.log("ERROR: Radisk needs `store.put` interface with `(file, data, cb)`!");
  24. }
  25. if(!opt.store.get){
  26. return opt.log("ERROR: Radisk needs `store.get` interface with `(file, cb)`!");
  27. }
  28. if(!opt.store.list){
  29. //opt.log("WARNING: `store.list` interface might be needed!");
  30. }
  31. /*
  32. Any and all storage adapters should...
  33. 1. Because writing to disk takes time, we should batch data to disk. This improves performance, and reduces potential disk corruption.
  34. 2. If a batch exceeds a certain number of writes, we should immediately write to disk when physically possible. This caps total performance, but reduces potential loss.
  35. */
  36. var r = function(key, val, cb){
  37. key = ''+key;
  38. if(val instanceof Function){
  39. var o = cb || {};
  40. cb = val;
  41. var S; LOG && (S = +new Date);
  42. val = r.batch(key);
  43. LOG && console.log(+new Date - S, 'rad mem');
  44. if(u !== val){
  45. cb(u, r.range(val, o), o);
  46. if(atomic(val)){ return }
  47. // if a node is requested and some of it is cached... the other parts might not be.
  48. }
  49. if(r.thrash.at){
  50. val = r.thrash.at(key);
  51. if(u !== val){
  52. cb(u, r.range(val, o), o);
  53. if(atomic(val)){ cb(u, val, o); return }
  54. // if a node is requested and some of it is cached... the other parts might not be.
  55. }
  56. }
  57. return r.read(key, cb, o);
  58. }
  59. r.batch(key, val);
  60. if(cb){ r.batch.acks.push(cb) }
  61. if(++r.batch.ed >= opt.batch){ return r.thrash() } // (2)
  62. if(r.batch.to){ return }
  63. //clearTimeout(r.batch.to); // (1) // THIS LINE IS EVIL! NEVER USE IT! ALSO NEVER DELETE THIS SO WE NEVER MAKE THE SAME MISTAKE AGAIN!
  64. r.batch.to = setTimeout(r.thrash, opt.until || 1);
  65. }
  66. r.batch = Radix();
  67. r.batch.acks = [];
  68. r.batch.ed = 0;
  69. r.thrash = function(){
  70. var thrash = r.thrash;
  71. if(thrash.ing){ return thrash.more = true }
  72. thrash.more = false;
  73. thrash.ing = true;
  74. var batch = thrash.at = r.batch, i = 0;
  75. clearTimeout(r.batch.to);
  76. r.batch = null;
  77. r.batch = Radix();
  78. r.batch.acks = [];
  79. r.batch.ed = 0;
  80. //console.debug(99); var ID = Gun.text.random(2), S = (+new Date); console.log("[[[[[[[[", ID, batch.acks.length);
  81. r.save(batch, function(err, ok){
  82. if(++i > 1){ opt.log('RAD ERR: Radisk has callbacked multiple times, please report this as a BUG at github.com/amark/gun/issues ! ' + i); return }
  83. if(err){ opt.log('err', err) }
  84. //console.debug(99); var TMP; console.log("]]]]]]]]", ID, batch.acks.length, (TMP = +new Date) - S, 'more?', thrash.more);
  85. map(batch.acks, function(cb){ cb(err, ok) });
  86. //console.log("][", +new Date - TMP, thrash.more);
  87. thrash.at = null;
  88. thrash.ing = false;
  89. if(thrash.more){ thrash() }
  90. });
  91. }
  92. /*
  93. 1. Find the first radix item in memory.
  94. 2. Use that as the starting index in the directory of files.
  95. 3. Find the first file that is lexically larger than it,
  96. 4. Read the previous file to that into memory
  97. 5. Scan through the in memory radix for all values lexically less than the limit.
  98. 6. Merge and write all of those to the in-memory file and back to disk.
  99. 7. If file too large, split. More details needed here.
  100. */
  101. r.save = function(rad, cb){
  102. var s = function Span(){};
  103. s.find = function(tree, key){
  104. if(key < s.start){ return }
  105. s.start = key;
  106. r.list(s.lex);
  107. return true;
  108. }
  109. s.lex = function(file){
  110. file = (u === file)? u : decodeURIComponent(file);
  111. if(!file || file > s.start){
  112. s.mix(s.file || opt.code.from, s.start, s.end = file);
  113. return true;
  114. }
  115. s.file = file;
  116. }
  117. s.mix = function(file, start, end){
  118. s.start = s.end = s.file = u;
  119. r.parse(file, function(err, disk){
  120. if(err){ return cb(err) }
  121. disk = disk || Radix();
  122. Radix.map(rad, function(val, key){
  123. if(key < start){ return }
  124. if(end && end < key){ return s.start = key }
  125. // PLUGIN: consider adding HAM as an extra layer of protection
  126. disk(key, val); // merge batch[key] -> disk[key]
  127. });
  128. r.write(file, disk, s.next);
  129. });
  130. }
  131. s.next = function(err, ok){
  132. if(s.err = err){ return cb(err) }
  133. if(s.start){ return Radix.map(rad, s.find) }
  134. cb(err, ok);
  135. }
  136. Radix.map(rad, s.find);
  137. }
  138. /*
  139. Any storage engine at some point will have to do a read in order to write.
  140. This is true of even systems that use an append only log, if they support updates.
  141. Therefore it is unavoidable that a read will have to happen,
  142. the question is just how long you delay it.
  143. */
  144. r.write = function(file, rad, cb, o){
  145. o = ('object' == typeof o)? o : {force: o};
  146. var f = function Fractal(){};
  147. f.text = '';
  148. f.count = 0;
  149. f.file = file;
  150. f.each = function(val, key, k, pre){
  151. //console.log("RAD:::", JSON.stringify([val, key, k, pre]));
  152. if(u !== val){ f.count++ }
  153. if(opt.pack <= (val||'').length){ return cb("Record too big!"), true }
  154. var enc = Radisk.encode(pre.length) +'#'+ Radisk.encode(k) + (u === val? '' : ':'+ Radisk.encode(val)) +'\n';
  155. if((opt.chunk < f.text.length + enc.length) && (1 < f.count) && !o.force){
  156. f.text = '';
  157. f.limit = Math.ceil(f.count/2);
  158. f.count = 0;
  159. f.sub = Radix();
  160. Radix.map(rad, f.slice);
  161. return true;
  162. }
  163. f.text += enc;
  164. }
  165. f.write = function(){
  166. var tmp = ename(file);
  167. var start; LOG && (start = +new Date); // comment this out!
  168. opt.store.put(tmp, f.text, function(err){
  169. LOG && console.log("wrote to disk in", (+new Date) - start, tmp); // comment this out!
  170. if(err){ return cb(err) }
  171. r.list.add(tmp, cb);
  172. });
  173. }
  174. f.slice = function(val, key){
  175. if(key < f.file){ return }
  176. if(f.limit < (++f.count)){
  177. var name = f.file;
  178. f.file = key;
  179. f.count = 0;
  180. r.write(name, f.sub, f.next, o);
  181. return true;
  182. }
  183. f.sub(key, val);
  184. }
  185. f.next = function(err){
  186. if(err){ return cb(err) }
  187. f.sub = Radix();
  188. if(!Radix.map(rad, f.slice)){
  189. r.write(f.file, f.sub, cb, o);
  190. }
  191. }
  192. if(opt.jsonify){ return r.write.jsonify(f, file, rad, cb, o) } // temporary testing idea
  193. if(!Radix.map(rad, f.each, true)){ f.write() }
  194. }
  195. r.write.jsonify = function(f, file, rad, cb, o){
  196. var raw;
  197. var start; LOG && (start = +new Date); // comment this out!
  198. try{raw = JSON.stringify(rad.$);
  199. }catch(e){ return cb("Record too big!") }
  200. LOG && console.log("stringified JSON in", +new Date - start); // comment this out!
  201. if(opt.chunk < raw.length && !o.force){
  202. if(Radix.map(rad, f.each, true)){ return }
  203. }
  204. f.text = raw;
  205. f.write();
  206. }
  207. r.range = function(tree, o){
  208. if(!tree || !o){ return }
  209. if(u === o.start && u === o.end){ return tree }
  210. if(atomic(tree)){ return tree }
  211. var sub = Radix();
  212. Radix.map(tree, function(v,k){
  213. sub(k,v);
  214. }, o);
  215. return sub('');
  216. }
  217. ;(function(){
  218. var Q = {};
  219. r.read = function(key, cb, o){
  220. o = o || {};
  221. if(RAD && !o.next){ // cache
  222. var S; LOG && (S = +new Date);
  223. var val = RAD(key);
  224. LOG && console.log(+new Date - S, 'rad cached');
  225. //if(u !== val){
  226. //cb(u, val, o);
  227. if(atomic(val)){ cb(u, val, o); return }
  228. // if a node is requested and some of it is cached... the other parts might not be.
  229. //}
  230. }
  231. o.span = (u !== o.start) || (u !== o.end); // is there a start or end?
  232. var g = function Get(){};
  233. g.lex = function(file){ var tmp;
  234. file = (u === file)? u : decodeURIComponent(file);
  235. tmp = o.next || key || (o.reverse? o.end || '\uffff' : o.start || '');
  236. if(!file || (o.reverse? file < tmp : file > tmp)){
  237. LOG && console.log(+new Date - S, 'rad read lex'); S = +new Date;
  238. if(o.next || o.reverse){ g.file = file }
  239. if(tmp = Q[g.file]){
  240. tmp.push({key: key, ack: cb, file: g.file, opt: o});
  241. return true;
  242. }
  243. Q[g.file] = [{key: key, ack: cb, file: g.file, opt: o}];
  244. if(!g.file){
  245. g.it(null, u, {});
  246. return true;
  247. }
  248. r.parse(g.file, g.it);
  249. return true;
  250. }
  251. g.file = file;
  252. }
  253. g.it = function(err, disk, info){
  254. if(g.err = err){ opt.log('err', err) }
  255. g.info = info;
  256. if(disk){ RAD = g.disk = disk }
  257. disk = Q[g.file]; delete Q[g.file];
  258. LOG && console.log(+new Date - S, 'rad read it in, now ack to:', disk.length); S = +new Date;
  259. map(disk, g.ack);
  260. LOG && console.log(+new Date - S, 'rad read acked');
  261. }
  262. g.ack = function(as){
  263. if(!as.ack){ return }
  264. var key = as.key, o = as.opt, info = g.info, rad = g.disk || noop, data = r.range(rad(key), o), last = rad.last || Radix.map(rad, rev, revo);
  265. o.parsed = (o.parsed || 0) + (info.parsed||0);
  266. o.chunks = (o.chunks || 0) + 1;
  267. o.more = true;
  268. if((!as.file) // if no more places to look
  269. || (!o.span && last === key) // if our key exactly matches the very last atomic record
  270. || (!o.span && last && last > key && 0 != last.indexOf(key)) // 'zach' may be lexically larger than 'za', but there still might be more, like 'zane' in the 'za' prefix bucket so do not end here.
  271. ){
  272. o.more = u;
  273. as.ack(g.err, data, o);
  274. return
  275. }
  276. if(u !== data){
  277. as.ack(g.err, data, o); // more might be coming!
  278. if(o.parsed >= o.limit){ return } // even if more, we've hit our limit, asking peer will need to make a new ask with a new starting point.
  279. }
  280. o.next = as.file;
  281. r.read(key, as.ack, o);
  282. }
  283. if(o.reverse){ g.lex.reverse = true }
  284. LOG && (S = +new Date);
  285. r.list(g.lex);
  286. }
  287. function rev(a,b){ return b }
  288. var revo = {reverse: true};
  289. }());
  290. ;(function(){
  291. /*
  292. Let us start by assuming we are the only process that is
  293. changing the directory or bucket. Not because we do not want
  294. to be multi-process/machine, but because we want to experiment
  295. with how much performance and scale we can get out of only one.
  296. Then we can work on the harder problem of being multi-process.
  297. */
  298. var Q = {}, s = String.fromCharCode(31);
  299. r.parse = function(file, cb, raw){ var q;
  300. if(q = Q[file]){ return q.push(cb) } q = Q[file] = [cb];
  301. var p = function Parse(){}, info = {};
  302. p.disk = Radix();
  303. p.read = function(err, data){ var tmp;
  304. LOG && console.log('read disk in', +new Date - S, ename(file)); // keep this commented out in
  305. delete Q[file];
  306. if((p.err = err) || (p.not = !data)){
  307. return map(q, p.ack);
  308. }
  309. if(typeof data !== 'string'){
  310. try{
  311. if(opt.pack <= data.length){
  312. p.err = "Chunk too big!";
  313. } else {
  314. data = data.toString(); // If it crashes, it crashes here. How!?? We check size first!
  315. }
  316. }catch(e){ p.err = e }
  317. if(p.err){ return map(q, p.ack) }
  318. }
  319. info.parsed = data.length;
  320. LOG && (S = +new Date); // keep this commented out in production!
  321. if(opt.jsonify || '{' === data[0]){ // temporary testing idea
  322. try{
  323. var json = JSON.parse(data);
  324. p.disk.$ = json;
  325. LOG && console.log('parsed JSON in', +new Date - S); // keep this commented out in production!
  326. map(q, p.ack);
  327. return;
  328. }catch(e){ tmp = e }
  329. if('{' === data[0]){
  330. p.err = tmp || "JSON error!";
  331. return map(q, p.ack);
  332. }
  333. }
  334. LOG && (S = +new Date); // keep this commented out in production!
  335. var tmp = p.split(data), pre = [], i, k, v;
  336. if(!tmp || 0 !== tmp[1]){
  337. p.err = "File '"+file+"' does not have root radix! ";
  338. return map(q, p.ack);
  339. }
  340. while(tmp){
  341. k = v = u;
  342. i = tmp[1];
  343. tmp = p.split(tmp[2])||'';
  344. if('#' == tmp[0]){
  345. k = tmp[1];
  346. pre = pre.slice(0,i);
  347. if(i <= pre.length){
  348. pre.push(k);
  349. }
  350. }
  351. tmp = p.split(tmp[2])||'';
  352. if('\n' == tmp[0]){ continue }
  353. if('=' == tmp[0] || ':' == tmp[0]){ v = tmp[1] }
  354. if(u !== k && u !== v){ p.disk(pre.join(''), v) }
  355. tmp = p.split(tmp[2]);
  356. }
  357. LOG && console.log('parsed RAD in', +new Date - S); // keep this commented out in production!
  358. //cb(err, p.disk);
  359. map(q, p.ack);
  360. };
  361. p.split = function(t){
  362. if(!t){ return }
  363. var l = [], o = {}, i = -1, a = '', b, c;
  364. i = t.indexOf(s);
  365. if(!t[i]){ return }
  366. a = t.slice(0, i);
  367. l[0] = a;
  368. l[1] = b = Radisk.decode(t.slice(i), o);
  369. l[2] = t.slice(i + o.i);
  370. return l;
  371. }
  372. p.ack = function(cb){
  373. if(!cb){ return }
  374. if(p.err || p.not){ return cb(p.err, u, info) }
  375. cb(u, p.disk, info);
  376. }
  377. var S; LOG && (S = +new Date); // keep this commented out in production!
  378. if(raw){ return p.read(null, raw) }
  379. opt.store.get(ename(file), p.read);
  380. }
  381. }());
  382. ;(function(){
  383. var dir, q, f = String.fromCharCode(28), ef = ename(f);
  384. r.list = function(cb){
  385. if(dir){
  386. var tmp = {reverse: (cb.reverse)? 1 : 0};
  387. Radix.map(dir, function(val, key){
  388. return cb(key);
  389. }, tmp) || cb();
  390. return;
  391. }
  392. if(q){ return q.push(cb) } q = [cb];
  393. r.parse(f, r.list.init);
  394. }
  395. r.list.add = function(file, cb){
  396. var has = dir(file);
  397. if(has || file === ef){
  398. return cb(u, 1);
  399. }
  400. dir(file, true);
  401. cb.listed = (cb.listed || 0) + 1;
  402. r.write(f, dir, function(err, ok){
  403. if(err){ return cb(err) }
  404. cb.listed = (cb.listed || 0) - 1;
  405. if(cb.listed !== 0){ return }
  406. cb(u, 1);
  407. }, true);
  408. }
  409. r.list.init = function(err, disk){
  410. if(err){
  411. opt.log('list', err);
  412. setTimeout(function(){ r.parse(f, r.list.init) }, 1000);
  413. return;
  414. }
  415. if(disk){
  416. r.list.drain(disk);
  417. return;
  418. }
  419. if(!opt.store.list){
  420. r.list.drain(Radix());
  421. return;
  422. }
  423. // import directory.
  424. opt.store.list(function(file){
  425. dir = dir || Radix();
  426. if(!file){ return r.list.drain(dir) }
  427. r.list.add(file, noop);
  428. });
  429. }
  430. r.list.drain = function(rad, tmp){
  431. r.list.dir = dir = rad;
  432. tmp = q; q = null;
  433. Gun.list.map(tmp, function(cb){
  434. r.list(cb);
  435. });
  436. }
  437. }());
  438. var noop = function(){}, RAD, u;
  439. Radisk.has[opt.file] = r;
  440. return r;
  441. }
  442. ;(function(){
  443. var _ = String.fromCharCode(31), u;
  444. Radisk.encode = function(d, o, s){ s = s || _;
  445. var t = s, tmp;
  446. if(typeof d == 'string'){
  447. var i = d.indexOf(s);
  448. while(i != -1){ t += s; i = d.indexOf(s, i+1) }
  449. return t + '"' + d + s;
  450. } else
  451. if(d && d['#'] && (tmp = Gun.val.link.is(d))){
  452. return t + '#' + tmp + t;
  453. } else
  454. if(Gun.num.is(d)){
  455. return t + '+' + (d||0) + t;
  456. } else
  457. if(null === d){
  458. return t + ' ' + t;
  459. } else
  460. if(true === d){
  461. return t + '+' + t;
  462. } else
  463. if(false === d){
  464. return t + '-' + t;
  465. }// else
  466. //if(binary){}
  467. }
  468. Radisk.decode = function(t, o, s){ s = s || _;
  469. var d = '', i = -1, n = 0, c, p;
  470. if(s !== t[0]){ return }
  471. while(s === t[++i]){ ++n }
  472. p = t[c = n] || true;
  473. while(--n >= 0){ i = t.indexOf(s, i+1) }
  474. if(i == -1){ i = t.length }
  475. d = t.slice(c+1, i);
  476. if(o){ o.i = i+1 }
  477. if('"' === p){
  478. return d;
  479. } else
  480. if('#' === p){
  481. return Gun.val.link.ify(d);
  482. } else
  483. if('+' === p){
  484. if(0 === d.length){
  485. return true;
  486. }
  487. return parseFloat(d);
  488. } else
  489. if(' ' === p){
  490. return null;
  491. } else
  492. if('-' === p){
  493. return false;
  494. }
  495. }
  496. }());
  497. if(typeof window !== "undefined"){
  498. var Gun = window.Gun;
  499. var Radix = window.Radix;
  500. window.Radisk = Radisk;
  501. } else {
  502. var Gun = require('../gun');
  503. var Radix = require('./radix');
  504. //var Radix = require('./radix2'); Radisk = require('./radisk2');
  505. try{ module.exports = Radisk }catch(e){}
  506. }
  507. Radisk.Radix = Radix;
  508. }());