1 /* $Id: jigdo-io.cc,v 1.15 2004/06/20 20:35:15 atterer Exp $ -*- C++ -*-
3 |_) /| Copyright (C) 2003 | richard@
4 | \/¯| Richard Atterer | atterer.net
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2. See
8 the file COPYING for details.
10 IO object for downloads of .jigdo URLs; download, gunzip, interpret
18 #include <configfile.hh>
20 #include <jigdo-io.hh>
22 #include <makeimagedl.hh>
24 #include <mimestream.hh>
25 //#include <url-part.hh>
26 //______________________________________________________________________
28 DEBUG_UNIT("jigdo-io")
34 inline bool isWhitespace(char x) { return ConfigFile::isWhitespace(x); }
36 inline bool advanceWhitespace(string::const_iterator& x,
37 const string::const_iterator& end) {
38 return ConfigFile::advanceWhitespace(x, end);
41 inline bool advanceWhitespace(string::iterator& x,
42 const string::const_iterator& end) {
43 return ConfigFile::advanceWhitespace(x, end);
48 JigdoIO::JigdoIO(MakeImageDl::Child* c, const string& url,
49 DataSource::IO* frontendIo)
50 : childDl(c), urlVal(url), frontend(frontendIo), parent(0), includeLine(0),
51 firstChild(0), next(0), rootAndImageSectionCandidate(this), line(0),
52 section(), imageSectionLine(0), imageName(), imageInfo(),
53 imageShortInfo(), templateUrl(), templateMd5(0), childFailedId(0),
56 // Non-root, i.e. [Include]d object
57 JigdoIO::JigdoIO(MakeImageDl::Child* c, const string& url,
58 DataSource::IO* frontendIo, JigdoIO* parentJigdo,
60 : childDl(c), urlVal(url), frontend(frontendIo), parent(parentJigdo),
61 includeLine(inclLine), firstChild(0), next(0),
62 rootAndImageSectionCandidate(parent->root()), line(0), section(),
63 imageSectionLine(0), imageName(), imageInfo(), imageShortInfo(),
64 templateUrl(), templateMd5(0), childFailedId(0), gunzip(this) {
65 //debug("JigdoIO: Parent of %1 is %2", url, parent->urlVal);
67 //______________________________________________________________________
72 if (childFailedId != 0) {
73 g_source_remove(childFailedId);
75 master()->childFailed(childDl, this, frontend);
78 /* Don't delete children; master will do this! If we deleted them here,
79 MakeImageDl::Child::childIoVal would be left dangling. */
80 // // Delete all our children
81 // JigdoIO* x = firstChild;
83 // JigdoIO* y = x->next;
91 source()->io.remove(this);
92 Paranoid(source()->io.get() != this);
95 //______________________________________________________________________
97 Job::IO* JigdoIO::job_removeIo(Job::IO* rmIo) {
98 debug("job_removeIo %1", rmIo);
100 // Do not "delete this" - this is called from ~JigdoIO above
101 DataSource::IO* c = frontend;
104 } else if (frontend != 0) {
105 Job::IO* c = frontend->job_removeIo(rmIo);
106 Paranoid(c == 0 || dynamic_cast<DataSource::IO*>(c) != 0);
107 debug("job_removeIo frontend=%1", c);
108 frontend = static_cast<DataSource::IO*>(c);
113 void JigdoIO::job_deleted() {
114 if (frontend != 0) frontend->job_deleted();
115 // Do not "delete this" - childDl owns us
118 void JigdoIO::job_succeeded() {
119 if (failed()) return;
121 if (gunzip.nextOut() > gunzipBuf) {
122 debug("job_succeeded: No newline at end");
124 const char* lineChars = reinterpret_cast<const char*>(gunzipBuf);
125 if (g_utf8_validate(lineChars, gunzip.nextOut()-gunzipBuf, NULL) != TRUE)
126 return generateError(_("Input .jigdo data is not valid UTF-8"));
127 string line(lineChars, gunzip.nextOut() - gunzipBuf);
129 if (failed()) return;
132 if (sectionEnd().failed()) return;
134 XStatus st = imgSect_eof();
135 if (st.xfailed()) return;
136 if (frontend != 0) frontend->job_succeeded();
137 master()->childSucceeded(childDl, this, frontend);
138 if (st.returned(1)) master()->jigdoFinished(); // Causes "delete this"
141 void JigdoIO::job_failed(string* message) {
143 if (failed()) return;
144 if (frontend != 0) frontend->job_failed(message);
145 string err = _("Download of .jigdo file failed");
146 master()->generateError(&err);
147 /* We cannot call this right now:
148 master()->childFailed(childDl, this, frontend);
149 so schedule a callback to call it later. */
150 childFailedId = g_idle_add_full(G_PRIORITY_HIGH_IDLE,&childFailed_callback,
151 (gpointer)this, NULL);
152 Paranoid(childFailedId != 0);
153 imageName.assign("", 1); Paranoid(failed());
156 void JigdoIO::job_message(string* message) {
157 if (failed()) return;
158 if (frontend != 0) frontend->job_message(message);
161 void JigdoIO::dataSource_dataSize(uint64 n) {
162 if (failed()) return;
163 if (frontend != 0) frontend->dataSource_dataSize(n);
166 void JigdoIO::dataSource_data(const byte* data, unsigned size,
167 uint64 currentSize) {
169 if (/*master()->finalState() ||*/ failed()) {
170 debug("Got %1 bytes, ignoring", size);
173 //Assert(master()->state() == MakeImageDl::DOWNLOADING_JIGDO);
174 debug("Got %1 bytes, processing", size);
176 gunzip.inject(data, size);
179 generateError(e.message);
182 if (frontend != 0) frontend->dataSource_data(data, size, currentSize);
184 //______________________________________________________________________
186 void JigdoIO::gunzip_deleted() { }
188 void JigdoIO::gunzip_needOut(Gunzip*) {
189 /* This is only called once, at the very start - afterwards, we always call
190 setOut() from gunzip_data, so Gunzip won't call this. */
191 gunzip.setOut(gunzipBuf, GUNZIP_BUF_SIZE);
194 /* Uncompressed data arrives. "decompressed" points somewhere inside
195 gunzipBuf. Split data apart at \n and interpret line(s), then copy any
196 remaining unfinished line to the start of gunzipBuf. The first byte of
197 gunzipBuf (if it contains valid data) is always the first char of a line
198 in the config file. */
199 void JigdoIO::gunzip_data(Gunzip*, byte* decompressed, unsigned size) {
200 if (failed()) return;
202 // Look for end of line.
203 byte* p = decompressed;
204 const byte* end = decompressed + size;
205 const byte* stringStart = gunzipBuf;
211 Paranoid(static_cast<unsigned>(p - stringStart) <= GUNZIP_BUF_SIZE);
212 Paranoid(line.empty());
213 const char* lineChars = reinterpret_cast<const char*>(stringStart);
214 if (g_utf8_validate(lineChars, p - stringStart, NULL) != TRUE)
215 throw Error(_("Input .jigdo data is not valid UTF-8"));
216 line.append(lineChars, p - stringStart);
218 if (failed()) return;
224 *p = ' '; // Allow Windows-style line endings by turning CR into space
225 else if (*p == 127 || (*p < 32 && *p != '\t')) // Check for evil chars
226 throw Error(_("Input .jigdo data contains invalid control characters"));
230 if (stringStart == gunzipBuf && p == stringStart + GUNZIP_BUF_SIZE) {
231 // A single line fills the whole buffer. Truncate it at that length.
232 debug("gunzip_data: long line");
233 Paranoid(line.empty());
234 const char* lineChars = reinterpret_cast<const char*>(stringStart);
235 if (g_utf8_validate(lineChars, p - stringStart, NULL) != TRUE)
236 throw Error(_("Input .jigdo data is not valid UTF-8"));
237 line.append(lineChars, p - stringStart);
239 if (failed()) return;
240 // Trick: To ignore remainder of huge line, prepend a comment char '#'
242 gunzip.setOut(gunzipBuf + 1, GUNZIP_BUF_SIZE - 1);
246 unsigned len = p - stringStart;
247 if (len > 0 && stringStart > gunzipBuf) {
248 // Unprocessed data left somewhere inside the buffer - copy to buf start
249 Assert(len < GUNZIP_BUF_SIZE); // Room must be left in the buffer
250 memmove(gunzipBuf, stringStart, len);
252 gunzip.setOut(gunzipBuf + len, GUNZIP_BUF_SIZE - len);
255 void JigdoIO::gunzip_failed(string* message) {
256 throw Error(*message, true);
258 //______________________________________________________________________
260 void JigdoIO::generateError(const string& msg) {
262 const char* fmt = (finished() ?
263 _("%1 (at end of %3)") : _("%1 (line %2 in %3)"));
264 err = subst(fmt, msg, line,
265 (source() != 0 ? source()->location().c_str() : "?") );
266 generateError_plain(&err);
269 void JigdoIO::generateError(const char* msg) {
271 const char* fmt = (finished() ?
272 _("%1 (at end of %3)") : _("%1 (line %2 in %3)"));
273 err = subst(fmt, msg, line,
274 (source() != 0 ? source()->location().c_str() : "?") );
275 generateError_plain(&err);
278 void JigdoIO::generateError_plain(string* err) {
279 debug("generateError: %1", err);
281 if (failed()) return;
282 if (frontend != 0) frontend->job_failed(err);
283 *err = _("Error processing .jigdo file contents");
284 master()->generateError(err);
286 /* We cannot call this right now:
287 master()->childFailed(childDl, this, frontend);
288 so schedule a callback to call it later. */
289 childFailedId = g_idle_add_full(G_PRIORITY_HIGH_IDLE,&childFailed_callback,
290 (gpointer)this, NULL);
291 Paranoid(childFailedId != 0);
292 imageName.assign("", 1); Paranoid(failed());
295 gboolean JigdoIO::childFailed_callback(gpointer data) {
296 JigdoIO* self = static_cast<JigdoIO*>(data);
297 debug("childFailed_callback for %1",
298 (self->source() != 0 ? self->source()->location().c_str() : "?") );
299 self->childFailedId = 0;
300 self->master()->childFailed(self->childDl, self, self->frontend);
301 self->master()->jigdoFinished(); // "delete self"
302 return FALSE; // "Don't call me again"
304 //______________________________________________________________________
306 // Finding the first [Image] section
308 /* While scanning the tree of [Include]d .jigdo files, only the first [Image]
309 section is relevant. IOW, we do a depth-first search of the tree. However,
310 the .jigdo files are downloaded in parallel, and we want to pass on the
311 image info as soon as possible. For this reason, we maintain an "image
312 section candidate pointer", one for the whole include tree.
314 If during the scanning of jigdo data we encounter an [Image] section AND
315 imgSectCandidate()==this, then that section is the first such section in
316 depth-first-order in the whole tree.
318 If instead we encounter an [Include], the included file /might/ contain an
319 image section, so we descend by setting imgSectCandidate() to the newly
320 created child download. However, it can turn out the child does not
321 actually contain an image section. In this case, we go back up to its
324 This is where it gets more complicated: Of course, the parent's data
325 continued to be downloaded while we were wasting our time waiting for the
326 last lines of the child, to be sure those last lines didn't contain an
327 image section. After the point where we descended into the child, any
328 number of [Include]s and /maybe/ an [Image] somewhere inbetween the
329 [Include]s could have been downloaded. To find out whether this was the
330 case, a quick depth-first scan of the tree is now necessary, up to the
331 next point where we "hang" again because some .jigdo file has not been
332 downloaded completely.
334 The whole code is also used to find out when all JigdoIOs have finished -
335 this could be done in simpler ways just by counting the active ones, but
336 it comes "for free" with this code. */
338 // New child created due to [Include] in current .jigdo data
339 void JigdoIO::imgSect_newChild(JigdoIO* child) {
340 if (master()->finalState() || imgSectCandidate() != this) return;
341 debug("imgSect_newChild%1: From %2:%3 to child %4",
342 (master()->haveImageSection() ? "(haveImageSection)" : ""),
343 urlVal, line, child->urlVal);
344 setImgSectCandidate(child);
347 // An [Image] section just ended - maybe it was the first one?
348 void JigdoIO::imgSect_parsed() {
349 //debug("imgSect_parsed: %1 %2 %3", imgSectCandidate(), this, master()->finalState());
350 if (master()->finalState() || imgSectCandidate() != this) return;
351 debug("imgSect_parsed%1: %2:%3", (master()->haveImageSection()
352 ? "(haveImageSection)" : ""), urlVal, line - 1);
353 if (master()->haveImageSection()) return;
354 master()->setImageSection(&imageName, &imageInfo, &imageShortInfo,
355 &templateUrl, &templateMd5);
360 inline const char* have(MakeImageDl* master) {
361 if (master->haveImageSection())
369 // The end of the file was hit
370 XStatus JigdoIO::imgSect_eof() {
371 MakeImageDl* m = master();
372 if (m->finalState() || imgSectCandidate() != this) return OK;
374 JigdoIO* x = parent; // Current position in tree
375 int l = includeLine; // Line number in x, 0 if at start
376 JigdoIO* child = this; // child included at line l of x, null if l==0
380 const char* indentStr = " ";
381 const char* indent = indentStr + 40;
383 while (ii != 0) { indent -= 2; ii = ii->parent; }
384 if (indent < indentStr) indent = indentStr;
385 debug("imgSect_eof:%1%2Now at %3:%4", have(m), indent, x->urlVal, l);
388 if (l == 0) nextChild = x->firstChild; else nextChild = child->next;
390 if (nextChild != 0) {
391 /* Before moving l to the line of the next [Include], check whether the
392 area of the file that l moves over contains an [Image] */
393 if (l < x->imageSectionLine
394 && x->imageSectionLine < nextChild->includeLine) {
395 debug("imgSect_eof:%1%2Found before [Include]", have(m), indent);
396 if (!m->haveImageSection())
397 m->setImageSection(&x->imageName, &x->imageInfo,
398 &x->imageShortInfo, &x->templateUrl, &x->templateMd5);
400 // No [Image] inbetween - move on, descend into [Include]
401 debug("imgSect_eof:%1%2Now at %3:%4, descending",
402 have(m), indent, x->urlVal, nextChild->includeLine);
409 // x has no more children - but maybe an [Image] at the end?
410 if (l < x->imageSectionLine) {
411 debug("imgSect_eof:%1%2Found after last [Include], if any",
413 if (!m->haveImageSection())
414 m->setImageSection(&x->imageName, &x->imageInfo,
415 &x->imageShortInfo, &x->templateUrl, &x->templateMd5);
418 // Nothing found. If x not yet fully downloaded, stop here
419 if (!x->finished()) {
420 debug("imgSect_eof:%1%2Waiting for %3 to download",
421 have(m), indent, x->urlVal);
422 setImgSectCandidate(x);
426 // Nothing found and finished - go back up in tree
427 debug("imgSect_eof:%1%2Now at end of %3, ascending",
428 have(m), indent, x->urlVal);
433 if (m->haveImageSection()) {
434 debug("imgSect_eof: Finished");
437 generateError(_("No `[Image]' section found in .jigdo data"));
441 //______________________________________________________________________
443 // New line of jigdo data arrived. This is similar to ConfigFile::rescan()
444 void JigdoIO::jigdoLine(string* l) {
445 //debug("\"%1\"", l);
448 if (failed()) return;
452 string::const_iterator x = s.begin(), end = s.end();
453 // Empty line, or only contains '#' comment
454 if (advanceWhitespace(x, end)) return;
456 bool inComment = (section == "Comment" || section == "comment");
458 // This is a "Label=Value" line
459 if (inComment) return;
461 while (!isWhitespace(*x) && *x != '=') { labelName += *x; ++x; }
462 if (advanceWhitespace(x, end) || *x != '=')
463 return generateError(_("No `=' after first word"));
465 advanceWhitespace(x, end);
466 // vector<string> value;
467 // ConfigFile::split(value, s, x - s.begin());
468 // entry(&labelName, &value);
469 entry(&labelName, &s, x - s.begin());
472 //____________________
474 // This is a "[Section]" line
475 if (sectionEnd().failed()) return;
476 ++x; // Advance beyond the '['
477 if (advanceWhitespace(x, end)) // Skip space after '['
478 return generateError(_("No closing `]' for section name"));
479 string::const_iterator s1 = x; // s1 points to start of section name
480 while (x != end && *x != ']' && !isWhitespace(*x) && *x != '['
481 && *x != '=' && *x != '#') ++x;
482 string::const_iterator s2 = x; // s2 points to end of section name
483 if (advanceWhitespace(x, end))
484 return generateError(_("No closing `]' for section name"));
485 section.assign(s1, s2);
486 //debug("Section `%1'", section);
488 // In special case of "Image", ignore 2nd and subsequent sections
489 if (section == "Image") {
490 if (imageSectionLine == 0)
491 imageSectionLine = line;
493 section += "(ignored)";
495 // In special case of "Include", format differs: URL after section name
496 if (section == "Include") {
498 while (x != end && *x != ']') { url += *x; ++x; }
500 while (i > 0 && isWhitespace(url[--i])) { }
505 return generateError(_("Section name invalid"));
506 ++x; // Advance beyond the ']'
507 if (!advanceWhitespace(x, end))
508 return generateError(_("Invalid characters after closing `]'"));
510 //______________________________________________________________________
512 Status JigdoIO::sectionEnd() {
513 if (section != "Image") return OK;
514 // Section that just ended was [Image]
515 const char* valueName = 0;
516 if (templateMd5 == 0) valueName = "Template-MD5Sum";
517 if (templateUrl.empty()) valueName = "Template";
518 if (imageName.empty()) valueName = "Filename";
519 if (valueName == 0) {
523 // Error: Not all required fields found
525 string s = subst(_("`%1=...' line missing in [Image] section"), valueName);
529 //______________________________________________________________________
531 // "[Include url]" found - add
532 void JigdoIO::include(string* url) {
534 Download::uriJoin(&includeUrl, urlVal, *url);
535 debug("%1:[Include %2]", line, includeUrl);
539 //debug("include: Parent of %1 is %2", p->urlVal,
540 // (p->parent ? p->parent->urlVal : "none"));
541 if (p->urlVal == includeUrl)
542 return generateError(_("Loop of [Include] directives"));
547 auto_ptr<MakeImageDl::Child> childDl(
548 master()->childFor(includeUrl, 0, &leafname));
549 if (childDl.get() != 0) {
550 MakeImageDl::IO* mio = master()->io.get();
551 string info = _("Retrieving .jigdo data");
552 string destDesc = subst(Job::MakeImageDl::destDescTemplate(),
554 auto_ptr<DataSource::IO> frontend(0);
556 frontend.reset(mio->makeImageDl_new(childDl->source(), includeUrl,
558 JigdoIO* jio = new JigdoIO(childDl.get(), includeUrl, frontend.get(),
560 childDl->setChildIo(jio);
562 if (mio != 0) mio->job_message(&info);
565 JigdoIO** jiop = &firstChild;
566 while (*jiop != 0) jiop = &(*jiop)->next;
569 imgSect_newChild(jio);
571 (childDl.release())->source()->run();
574 //______________________________________________________________________
577 // For Base64In - put decoded bytes into 16-byte array
579 typedef ArrayOut& ResultType;
581 void set(byte* array) { cur = array; end = array + 16; }
582 void put(byte b) { if (cur == end) cur = end = 0; else *cur++ = b; }
583 ArrayOut& result() { return *this; }
584 byte* cur; byte* end;
587 //____________________
589 /* @param label Pointer to word before the '='
590 @param data Pointer to string containing whole input line
591 @param valueOff Offset of value (part after '=') in data */
592 void JigdoIO::entry(string* label, string* data, unsigned valueOff) {
593 vector<string> value;
594 ConfigFile::split(value, *data, valueOff);
597 for (vector<string>::iterator i = value.begin(), e = value.end();
598 i != e; ++i) { s += '>'; s += *i; s += "< "; }
599 // { s += ConfigFile::quote(*i); s += ' '; }
600 debug("%1:[%2] %3=%4", line, section, label, s);
602 //____________________
604 if (section == "Include") {
606 return generateError(_("A new section must be started after [Include]"));
607 //____________________
609 } else if (section == "Jigdo") {
610 if (*label == "Version") {
611 if (value.empty()) return generateError(_("Missing argument"));
613 string::const_iterator i = value.front().begin();
614 string::const_iterator e = value.front().end();
615 while (i != e && *i >= '0' && *i <= '9') {
616 ver = 10 * ver + *i - '0';
619 if (ver > SUPPORTED_FORMAT)
620 return generateError(_("Upgrade required - this .jigdo file needs "
621 "a newer version of the jigdo program"));
623 //____________________
625 } else if (section == "Image") {
627 /* Only called for first [Image] section in file - for further sections,
628 section=="Image(ignored)". Does some sanity checks on the supplied
630 if (*label == "Filename") {
631 if (!imageName.empty()) return generateError(_("Value redefined"));
632 if (value.empty()) return generateError(_("Missing argument"));
633 // Only use leaf name, ignore dirname delimiters, max 100 chars
634 string::size_type lastSlash = value.front().rfind('/');
635 string::size_type lastSep = value.front().rfind(DIRSEP);
636 if (lastSlash > lastSep) lastSep = lastSlash;
637 imageName.assign(value.front(), lastSep + 1, 100);
638 if (imageName.empty()) return generateError(_("Invalid image name"));
639 } else if (*label == "Template") {
640 if (!templateUrl.empty()) return generateError(_("Value redefined"));
641 if (value.empty()) return generateError(_("Missing argument"));
642 templateUrl = value.front();
643 } else if (*label == "Template-MD5Sum") {
644 if (templateMd5 != 0) return generateError(_("Value redefined"));
645 if (value.empty()) return generateError(_("Missing argument"));
646 templateMd5 = new MD5();
647 // Helper class places decoded bytes into MD5 object
648 Base64In<ArrayOut> decoder;
649 decoder.result().set(templateMd5->sum);
650 decoder << value.front();
651 if (decoder.result().cur == 0
652 || decoder.result().cur != decoder.result().end) {
653 delete templateMd5; templateMd5 = 0;
654 return generateError(_("Invalid Template-MD5Sum argument"));
656 // For security, double-check the value
658 b64.write(templateMd5->sum, 16).flush();
659 if (b64.result() != value.front()) {
660 debug("b64='%1' value='%2'", b64.result(), value.front());
661 return generateError(_("Invalid Template-MD5Sum argument"));
663 } else if (*label == "ShortInfo") {
664 // ShortInfo is 200 chars max
665 if(!imageShortInfo.empty()) return generateError(_("Value redefined"));
666 imageShortInfo.assign(*data, valueOff, 200);
667 } else if (*label == "Info") {
668 // ImageInfo is 5000 chars max
669 if (!imageInfo.empty()) return generateError(_("Value redefined"));
670 imageInfo.assign(*data, valueOff, 5000);
673 } else if (section == "Parts") {
676 if (value.empty()) return generateError(_("Missing argument"));
678 Base64In<ArrayOut> decoder;
679 decoder.result().set(md5.sum);
681 if (decoder.result().cur == 0
682 || decoder.result().cur != decoder.result().end) {
683 return generateError(_("Invalid MD5Sum in Parts section"));
685 // For security, double-check the value
687 b64.write(md5.sum, 16).flush();
688 if (b64.result() != *label) {
689 debug("x b64='%1' value='%2'", b64.result(), *label);
690 return generateError(_("Invalid MD5Sum in Parts section"));
692 debug("PART %1 -> %2", md5.toString(), value.front());
693 } // endif (section == "Something")