radisk.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. ;(function(){
  2. function Radisk(opt){
  3. opt = opt || {};
  4. opt.log = opt.log || console.log;
  5. opt.file = String(opt.file || 'radata');
  6. opt.pack = opt.pack || (opt.memory? (opt.memory * 1000 * 1000) : 1399000000) * 0.3; // max_old_space_size defaults to 1400 MB.
  7. opt.until = opt.until || opt.wait || 9;
  8. opt.batch = opt.batch || 10 * 1000;
  9. opt.chunk = opt.chunk || (1024 * 1024 * 10); // 10MB
  10. opt.code = opt.code || {};
  11. opt.code.from = opt.code.from || '!';
  12. function ename(t){ return encodeURIComponent(t).replace(/\*/g, '%2A') }
  13. var map = Gun.obj.map;
  14. if(!opt.store){
  15. return opt.log("ERROR: Radisk needs `opt.store` interface with `{get: fn, put: fn (, list: fn)}`!");
  16. }
  17. if(!opt.store.put){
  18. return opt.log("ERROR: Radisk needs `store.put` interface with `(file, data, cb)`!");
  19. }
  20. if(!opt.store.get){
  21. return opt.log("ERROR: Radisk needs `store.get` interface with `(file, cb)`!");
  22. }
  23. if(!opt.store.list){
  24. //opt.log("WARNING: `store.list` interface might be needed!");
  25. }
  26. /*
  27. Any and all storage adapters should...
  28. 1. Because writing to disk takes time, we should batch data to disk. This improves performance, and reduces potential disk corruption.
  29. 2. If a batch exceeds a certain number of writes, we should immediately write to disk when physically possible. This caps total performance, but reduces potential loss.
  30. */
  31. var r = function(key, val, cb){
  32. key = ''+key;
  33. if(val instanceof Function){
  34. cb = val;
  35. val = r.batch(key);
  36. if(u !== val){
  37. // if a node is requested and some of it is cached... the other parts might not be.
  38. return cb(u, val);
  39. }
  40. if(r.thrash.at){
  41. val = r.thrash.at(key);
  42. if(u !== val){
  43. // if a node is requested and some of it is cached... the other parts might not be.
  44. return cb(u, val);
  45. }
  46. }
  47. return r.read(key, cb);
  48. }
  49. r.batch(key, val);
  50. if(cb){ r.batch.acks.push(cb) }
  51. if(++r.batch.ed >= opt.batch){ return r.thrash() } // (2)
  52. clearTimeout(r.batch.to); // (1)
  53. r.batch.to = setTimeout(r.thrash, opt.until || 1);
  54. }
  55. r.batch = Radix();
  56. r.batch.acks = [];
  57. r.batch.ed = 0;
  58. r.thrash = function(){
  59. var thrash = r.thrash;
  60. if(thrash.ing){ return thrash.more = true }
  61. thrash.more = false;
  62. thrash.ing = true;
  63. var batch = thrash.at = r.batch, i = 0;
  64. clearTimeout(r.batch.to);
  65. r.batch = null;
  66. r.batch = Radix();
  67. r.batch.acks = [];
  68. r.batch.ed = 0;
  69. r.save(batch, function(err, ok){
  70. if(++i > 1){ return }
  71. if(err){ opt.log('err', err) }
  72. map(batch.acks, function(cb){ cb(err, ok) });
  73. thrash.at = null;
  74. thrash.ing = false;
  75. if(thrash.more){ thrash() }
  76. });
  77. }
  78. /*
  79. 1. Find the first radix item in memory.
  80. 2. Use that as the starting index in the directory of files.
  81. 3. Find the first file that is lexically larger than it,
  82. 4. Read the previous file to that into memory
  83. 5. Scan through the in memory radix for all values lexically less than the limit.
  84. 6. Merge and write all of those to the in-memory file and back to disk.
  85. 7. If file to large, split. More details needed here.
  86. */
  87. r.save = function(rad, cb){
  88. var s = function Span(){};
  89. s.find = function(tree, key){
  90. if(key < s.start){ return }
  91. s.start = key;
  92. r.list(s.lex);
  93. return true;
  94. }
  95. s.lex = function(file){
  96. file = (u === file)? u : decodeURIComponent(file);
  97. if(!file || file > s.start){
  98. s.mix(s.file || opt.code.from, s.start, s.end = file);
  99. return true;
  100. }
  101. s.file = file;
  102. }
  103. s.mix = function(file, start, end){
  104. s.start = s.end = s.file = u;
  105. r.parse(file, function(err, disk){
  106. if(err){ return cb(err) }
  107. disk = disk || Radix();
  108. Radix.map(rad, function(val, key){
  109. if(key < start){ return }
  110. if(end && end < key){ return s.start = key }
  111. // PLUGIN: consider adding HAM as an extra layer of protection
  112. disk(key, val); // merge batch[key] -> disk[key]
  113. });
  114. r.write(file, disk, s.next);
  115. });
  116. }
  117. s.next = function(err, ok){
  118. if(s.err = err){ return cb(err) }
  119. if(s.start){ return Radix.map(rad, s.find) }
  120. cb(err, ok);
  121. }
  122. Radix.map(rad, s.find);
  123. }
  124. /*
  125. Any storage engine at some point will have to do a read in order to write.
  126. This is true of even systems that use an append only log, if they support updates.
  127. Therefore it is unavoidable that a read will have to happen,
  128. the question is just how long you delay it.
  129. */
  130. r.write = function(file, rad, cb, force){
  131. var f = function Fractal(){};
  132. f.text = '';
  133. f.count = 0;
  134. f.file = file;
  135. f.each = function(val, key, k, pre){
  136. if(u !== val){ f.count++ }
  137. if(opt.pack <= (val||'').length){ return cb("Record too big!"), true }
  138. var enc = Radisk.encode(pre.length) +'#'+ Radisk.encode(k) + (u === val? '' : '='+ Radisk.encode(val)) +'\n';
  139. if((opt.chunk < f.text.length + enc.length) && (1 < f.count) && !force){
  140. f.text = '';
  141. f.limit = Math.ceil(f.count/2);
  142. f.count = 0;
  143. f.sub = Radix();
  144. Radix.map(rad, f.slice)
  145. return true;
  146. }
  147. f.text += enc;
  148. }
  149. f.write = function(){
  150. var tmp = ename(file);
  151. opt.store.put(tmp, f.text, function(err){
  152. if(err){ return cb(err) }
  153. r.list.add(tmp, cb);
  154. });
  155. }
  156. f.slice = function(val, key){
  157. if(key < f.file){ return }
  158. if(f.limit < (++f.count)){
  159. var name = f.file;
  160. f.file = key;
  161. f.count = 0;
  162. r.write(name, f.sub, f.next, force);
  163. return true;
  164. }
  165. f.sub(key, val);
  166. }
  167. f.next = function(err){
  168. if(err){ return cb(err) }
  169. f.sub = Radix();
  170. if(!Radix.map(rad, f.slice)){
  171. r.write(f.file, f.sub, cb, force);
  172. }
  173. }
  174. if(!Radix.map(rad, f.each, true)){ f.write() }
  175. }
  176. ;(function(){
  177. var Q = {};
  178. r.read = function(key, cb, next){
  179. if(RAD && !next){ // cache
  180. var val = RAD(key);
  181. if(u !== val){
  182. // if a node is requested and some of it is cached... the other parts might not be.
  183. return cb(u, val);
  184. }
  185. }
  186. var g = function Get(){}, tmp;
  187. g.lex = function(file){
  188. file = (u === file)? u : decodeURIComponent(file);
  189. if(!file || file > (next || key)){
  190. if(next){ g.file = file }
  191. if(tmp = Q[g.file]){
  192. tmp.push({key: key, ack: cb, file: g.file});
  193. return true;
  194. }
  195. Q[g.file] = [{key: key, ack: cb, file: g.file}];
  196. r.parse(g.file, g.it);
  197. return true;
  198. }
  199. g.file = file;
  200. }
  201. g.it = function(err, disk){
  202. if(g.err = err){ opt.log('err', err) }
  203. if(disk){ RAD = g.disk = disk }
  204. disk = Q[g.file]; delete Q[g.file];
  205. map(disk, g.ack);
  206. }
  207. g.ack = function(as){
  208. if(!as.ack){ return }
  209. var tmp = as.key, rad = g.disk || noop, data = rad(tmp), last = rad.last;
  210. if(data){ as.ack(g.err, data) }
  211. else if(!as.file){ return as.ack(g.err, u) }
  212. if(!last || last === tmp){ return as.ack(g.err, u) } // is this correct?
  213. if(last > tmp && 0 > last.indexOf(tmp)){ return as.ack(g.err, u) }
  214. r.read(tmp, as.ack, as.file);
  215. }
  216. r.list(g.lex);
  217. }
  218. }());
  219. ;(function(){
  220. /*
  221. Let us start by assuming we are the only process that is
  222. changing the directory or bucket. Not because we do not want
  223. to be multi-process/machine, but because we want to experiment
  224. with how much performance and scale we can get out of only one.
  225. Then we can work on the harder problem of being multi-process.
  226. */
  227. var Q = {}, s = String.fromCharCode(31);
  228. r.parse = function(file, cb){ var q;
  229. if(q = Q[file]){ return q.push(cb) } q = Q[file] = [cb];
  230. var p = function Parse(){};
  231. p.disk = Radix();
  232. p.read = function(err, data){ var tmp;
  233. delete Q[file];
  234. if((p.err = err) || (p.not = !data)){
  235. //return cb(err, u);//map(q, p.ack);
  236. return map(q, p.ack);
  237. }
  238. if(typeof data !== 'string'){
  239. try{
  240. if(opt.pack <= data.length){
  241. p.err = "Chunk too big!";
  242. } else {
  243. data = data.toString();
  244. }
  245. }catch(e){ p.err = e }
  246. if(p.err){ return map(q, p.ack) }
  247. }
  248. var tmp = p.split(data), pre = [], i, k, v;
  249. while(tmp){
  250. k = v = u;
  251. i = tmp[1];
  252. tmp = p.split(tmp[2])||'';
  253. if('#' == tmp[0]){
  254. k = tmp[1];
  255. pre = pre.slice(0,i);
  256. if(i <= pre.length){
  257. pre.push(k);
  258. }
  259. }
  260. tmp = p.split(tmp[2])||'';
  261. if('\n' == tmp[0]){ continue }
  262. if('=' == tmp[0]){ v = tmp[1] }
  263. if(u !== k && u !== v){ p.disk(pre.join(''), v) }
  264. tmp = p.split(tmp[2]);
  265. }
  266. //cb(err, p.disk);
  267. map(q, p.ack);
  268. };
  269. p.split = function(t){
  270. if(!t){ return }
  271. var l = [], o = {}, i = -1, a = '', b, c;
  272. i = t.indexOf(s);
  273. if(!t[i]){ return }
  274. a = t.slice(0, i);
  275. l[0] = a;
  276. l[1] = b = Radisk.decode(t.slice(i), o);
  277. l[2] = t.slice(i + o.i);
  278. return l;
  279. }
  280. p.ack = function(cb){
  281. if(!cb){ return }
  282. if(p.err || p.not){ return cb(p.err, u) }
  283. cb(u, p.disk);
  284. }
  285. opt.store.get(ename(file), p.read);
  286. }
  287. }());
  288. ;(function(){
  289. var dir, q, f = String.fromCharCode(28), ef = ename(f);
  290. r.list = function(cb){
  291. if(dir){
  292. Radix.map(dir, function(val, key){
  293. return cb(key);
  294. }) || cb();
  295. return;
  296. }
  297. if(q){ return q.push(cb) } q = [cb];
  298. r.parse(f, r.list.init);
  299. }
  300. r.list.add = function(file, cb){
  301. var has = dir(file);
  302. if(has || file === ef){
  303. return cb(u, 1);
  304. }
  305. dir(file, true);
  306. r.write(f, dir, function(err, ok){
  307. if(err){ return cb(err) }
  308. cb(u, 1);
  309. }, true);
  310. }
  311. r.list.init = function(err, disk){
  312. if(err){
  313. opt.log('list', err);
  314. setTimeout(function(){ r.parse(f, r.list.init) }, 1000);
  315. return;
  316. }
  317. if(disk){
  318. r.list.drain(disk);
  319. return;
  320. }
  321. if(!opt.store.list){
  322. r.list.drain(Radix());
  323. return;
  324. }
  325. // import directory.
  326. opt.store.list(function(file){
  327. dir = dir || Radix();
  328. if(!file){ return r.list.drain(dir) }
  329. r.list.add(file, noop);
  330. });
  331. }
  332. r.list.drain = function(rad, tmp){
  333. r.list.dir = dir = rad;
  334. tmp = q; q = null;
  335. Gun.list.map(tmp, function(cb){
  336. Radix.map(dir, function(val, key){
  337. return cb(key);
  338. }) || cb();
  339. });
  340. }
  341. }());
  342. var noop = function(){}, RAD, u;
  343. return r;
  344. }
  345. ;(function(){
  346. var _ = String.fromCharCode(31), u;
  347. Radisk.encode = function(d, o, s){ s = s || _;
  348. var t = s, tmp;
  349. if(typeof d == 'string'){
  350. var i = d.indexOf(s);
  351. while(i != -1){ t += s; i = d.indexOf(s, i+1) }
  352. return t + '"' + d + s;
  353. } else
  354. if(d && d['#'] && (tmp = Gun.val.link.is(d))){
  355. return t + '#' + tmp + t;
  356. } else
  357. if(Gun.num.is(d)){
  358. return t + '+' + (d||0) + t;
  359. } else
  360. if(null === d){
  361. return t + ' ' + t;
  362. } else
  363. if(true === d){
  364. return t + '+' + t;
  365. } else
  366. if(false === d){
  367. return t + '-' + t;
  368. }// else
  369. //if(binary){}
  370. }
  371. Radisk.decode = function(t, o, s){ s = s || _;
  372. var d = '', i = -1, n = 0, c, p;
  373. if(s !== t[0]){ return }
  374. while(s === t[++i]){ ++n }
  375. p = t[c = n] || true;
  376. while(--n >= 0){ i = t.indexOf(s, i+1) }
  377. if(i == -1){ i = t.length }
  378. d = t.slice(c+1, i);
  379. if(o){ o.i = i+1 }
  380. if('"' === p){
  381. return d;
  382. } else
  383. if('#' === p){
  384. return Gun.val.link.ify(d);
  385. } else
  386. if('+' === p){
  387. if(0 === d.length){
  388. return true;
  389. }
  390. return parseFloat(d);
  391. } else
  392. if(' ' === p){
  393. return null;
  394. } else
  395. if('-' === p){
  396. return false;
  397. }
  398. }
  399. }());
  400. if(typeof window !== "undefined"){
  401. var Gun = window.Gun;
  402. var Radix = window.Radix;
  403. window.Radisk = Radisk;
  404. } else {
  405. var Gun = require('../gun');
  406. var Radix = require('./radix');
  407. try{ module.exports = Radisk }catch(e){}
  408. }
  409. Radisk.Radix = Radix;
  410. }());