1 /* $Id: download.cc,v 1.12 2003/09/16 23:32:10 atterer Exp $ -*- C++ -*-
3 |_) /| Copyright (C) 2001-2003 | richard@
4 | \/¯| Richard Atterer | atterer.net
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2. See
8 the file COPYING for details.
10 Download data from URL, write to output function, report on progress
12 This is the one and only file which accesses libwww directly.
24 # include <sys/utsname.h>
28 #include <download.hh>
32 #include <string-utf.hh>
33 //______________________________________________________________________
35 string Download::userAgent;
37 DEBUG_UNIT("download")
41 Logger libwwwDebug("libwww");
44 int tracer(const char* fmt, va_list args) {
45 vfprintf(stderr, fmt, args);
49 BOOL nonono(HTRequest*, HTAlertOpcode, int, const char*, void*,
55 // Initialize (g)libwww
56 void Download::init() {
59 HTSetTraceMessageMask("flbtspuhox");
60 HTTrace_setCallback(tracer);
63 HTEventInit(); // Necessary on Windows to initialize WinSock
64 HTNet_setMaxSocket(32);
66 /* These calls are necessary for redirections to work. (Why? Don't
67 ask why - this is libwww, after all...) */
68 HTList* converters = HTList_new();
69 HTConverterInit(converters); // Register the default set of converters
70 HTFormat_setConversion(converters); // Global converters for all requests
72 HTAlert_setInteractive(YES);
73 // HTPrint_setCallback(printer);
74 glibwww_init("jigdo", JIGDO_VERSION);
76 HTAlert_add(Download::alertCallback, HT_A_PROGRESS); // Progress reports
77 HTAlert_add(nonono, static_cast<HTAlertOpcode>(
78 HT_A_CONFIRM | HT_A_PROMPT | HT_A_SECRET | HT_A_USER_PW));
79 // To get notified of errors, redirects etc.
80 HTNet_addAfter(Download::afterFilter, NULL /*template*/, 0 /*param*/,
81 HT_ALL, HT_FILTER_MIDDLE);
83 HTFTP_setTransferMode(FTP_BINARY_TRANSFER_MODE);
85 //HTHost_setActivateRequestCallback(Download::activateRequestCallback);
87 if (userAgent.empty()) {
88 userAgent = "jigdo/" JIGDO_VERSION;
90 userAgent += " (Windows";
92 memset(&info, 0, sizeof(OSVERSIONINFO));
93 info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
94 if (GetVersionEx(&info) != 0) {
96 if (info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { // 95/98/Me
97 if (info.dwMinorVersion < 10) s = " 95";
98 else if (info.dwMinorVersion < 90) s = " 98";
100 } else if (info.dwPlatformId == VER_PLATFORM_WIN32_NT) { // NT/00/XP/03
101 if (info.dwMajorVersion < 5) s = " NT";
102 else if (info.dwMinorVersion == 0) s = " 2000";
103 else if (info.dwMinorVersion == 1) s = " XP";
104 else if (info.dwMinorVersion == 2) s = " 2003";
112 if (uname(&ubuf) == 0) {
114 userAgent += ubuf.sysname; userAgent += ' '; userAgent += ubuf.release;
118 userAgent += " libwww/";
119 userAgent += HTLib_version();
120 debug("User-Agent: %1", userAgent);
123 //______________________________________________________________________
125 void Download::uriJoin(string* dest, const string& base, const string& rel) {
126 if (HTURL_isAbsolute(rel.c_str())) {
129 char* joined = HTParse(rel.c_str(), base.c_str(), PARSE_ALL);
130 *dest = HTSimplify(&joined);
134 //______________________________________________________________________
138 inline Download* getDownload(HTRequest* request) {
139 return static_cast<Download*>(HTRequest_context(request));
141 inline Download* getDownload(HTStream* stream) {
142 return reinterpret_cast<Download*>(stream);
146 //______________________________________________________________________
148 Download::Download(const string& uri, Output* o)
149 : uriVal(uri), resumeOffsetVal(0), resumeChecked(true), currentSize(0),
150 outputVal(o), request(0), state(CREATED), stopLaterId(0),
151 insideNewData(false) {
152 static const HTStreamClass downloadWriter = {
153 "jigdoDownloadWriter", flush, free, abort, putChar, putString, write
155 vptr = &downloadWriter;
157 /* The code below (e.g. in putChar()) silently assumes that the first data
158 member's address of a Download object is identical to the object's
159 address. The C++ standard makes no guarantee about this. :-/ */
160 Assert(static_cast<void*>(this) == static_cast<void*>(&vptr));
161 request = HTRequest_new();
163 // Store within the HTRequest object a ptr to the corresponding Download
164 HTRequest_setContext(request, static_cast<void*>(this));
166 HTStream* writer = reinterpret_cast<HTStream*>(this); // Shudder... :-)
167 HTRequest_setOutputFormat(request, WWW_SOURCE); // Raw data, no headers...
168 HTRequest_setOutputStream(request, writer); // is sent to writer
169 //HTRequest_setDebugStream(request, NULL); // body different from 200 OK
170 HTRequest_setAnchor(request, HTAnchor_findAddress(uriVal.c_str()));
172 // Remove libwww's User-Agent field and add our own
173 HTRequest_setRqHd(request,
174 static_cast<HTRqHd>(HTRequest_rqHd(request) & ~HT_C_USER_AGENT));
175 HTRequest_addExtraHeader(request, "User-Agent",
176 const_cast<char*>(userAgent.c_str()));
178 //________________________________________
180 Download::~Download() {
182 Assert(insideNewData == false);
185 if (request != 0) HTRequest_delete(request);
186 if (stopLaterId != 0) g_source_remove(stopLaterId);
188 //______________________________________________________________________
190 void Download::setPragmaNoCache(bool pragmaNoCache) {
191 Paranoid(state == CREATED || failed() || succeeded() || interrupted());
192 // Force reload from originating server, bypassing proxies?
194 HTRequest_addGnHd(request, HT_G_PRAGMA_NO_CACHE);
196 HTRequest_setGnHd(request, static_cast<HTGnHd>(HTRequest_gnHd(request)
197 & ~HT_G_PRAGMA_NO_CACHE));
200 /* Important: Our HTRequest object can be used several times - we must ensure
201 that any non-default settings (e.g. "Range" header) are reset before
203 void Download::run() {
204 debug("run resumeOffset=%1", resumeOffset());
205 Assert(outputVal != 0); // Must have set up output
206 Paranoid(request != 0); // Don't call this after stop()
207 //Assert(destroyRequestId == 0); // No pending callback allowed from now on
209 currentSize = resumeOffset();
211 // Shall we resume the download from a certain offset?
212 HTRequest_deleteRange(request); // Delete old range, if any
213 if (resumeOffset() > 0) {
214 /* TODO: If we contacted the host earlier, we could use
215 HTHost_isRangeUnitAcceptable() to check whether the host accepts range
218 // range can be "345-999" (both inclusive) or "345-"; offsets start at 0
220 append(range, resumeOffset());
222 HTRequest_addRange(request, "bytes", const_cast<char*>(range.c_str()));
223 /* A server can ignore the range for various reasons (unsupported,
224 requested offset outside file) - this can be detected by the
225 presence/absence of a content-range header in its answer (header
226 present and "206 Partial Response" <=> partial retrieval OK). Check
227 later whether Content-Range is present and correct. */
228 resumeChecked = false;
231 if (HTLoad(request, NO) == NO) generateError();
234 //______________________________________________________________________
236 /* Implementation for the libwww HTStream functionality - forwards the
237 calls to the Output object.
238 Return codes: HT_WOULD_BLOCK, HT_ERROR, HT_OK, >0 to pass back. */
240 int Download::free(HTStream* me) {
241 Download* self = getDownload(me);
242 debug("free %1", self);
243 HTRequest_setContext(self->request, 0);
244 Assert(!self->insideNewData);
248 int Download::flush(HTStream* me) {
249 debug("flush %1", getDownload(me));
252 int Download::abort(HTStream* me, HTList*) {
253 debug("abort %1", getDownload(me));
257 int Download::flush(HTStream*) { return HT_OK; }
258 int Download::abort(HTStream*, HTList*) { return HT_OK; }
260 //________________________________________
262 int Download::putChar(HTStream* me, char c) {
263 Download* self = getDownload(me);
264 if (self->stopLaterId != 0) return HT_OK;
266 self->insideNewData = true;
267 if (!self->resumeChecked && self->resumeCheck()) {
268 self->insideNewData = false;
271 if (self->state == PAUSE_SCHEDULED) self->pauseNow();
272 self->currentSize += 1;
273 self->outputVal->download_data(reinterpret_cast<const byte*>(&c),
274 1, self->currentSize);
275 self->insideNewData = false;
278 int Download::putString(HTStream* me, const char* s) {
279 Download* self = getDownload(me);
280 if (self->stopLaterId != 0) return HT_OK;
281 self->insideNewData = true;
282 if (!self->resumeChecked && self->resumeCheck()) {
283 self->insideNewData = false;
286 if (self->state == PAUSE_SCHEDULED) self->pauseNow();
287 size_t len = strlen(s);
288 self->currentSize += len;
289 self->outputVal->download_data(reinterpret_cast<const byte*>(s),
290 len, self->currentSize);
291 self->insideNewData = false;
294 int Download::write(HTStream* me, const char* s, int l) {
295 Download* self = getDownload(me);
296 self->insideNewData = true;
297 if (self->stopLaterId != 0) return HT_OK;
298 if (!self->resumeChecked && self->resumeCheck()) {
299 self->insideNewData = false;
302 if (self->state == PAUSE_SCHEDULED) self->pauseNow();
303 size_t len = static_cast<size_t>(l);
304 self->currentSize += len;
305 self->outputVal->download_data(reinterpret_cast<const byte*>(s),
306 len, self->currentSize);
307 self->insideNewData = false;
310 //______________________________________________________________________
312 bool Download::resumeCheck() {
313 resumeChecked = true;
315 HTNet* net = HTRequest_net(request);
316 unsigned protocol = HTProtocol_id(HTNet_protocol(net));
317 if (protocol != 80 && protocol != 21) return false;
318 // The check below only works for HTTP (and FTP in hacked libwww 5.4.0)
320 do { // Never loops, just to break out
321 HTAssocList* ranges = HTResponse_range(HTRequest_response(request));
322 if (ranges == 0) break;
323 HTAssoc* r = static_cast<HTAssoc*>(HTAssocList_nextObject(ranges));
325 if (strcmp(HTAssoc_name(r), "bytes") != 0) break;
326 const char* s = HTAssoc_value(r);
329 while (*s >= '0' && *s <= '9') startOff = startOff * 10 + (*s++ - '0');
330 debug("resumeCheck: resumeOffsetVal=%1, server offset=%2",
331 resumeOffset(), startOff);
332 if (startOff == resumeOffset())
336 // Error, resume not possible (e.g. because it's a HTTP 1.0 server)
337 debug("resumeCheck: Resume not supported");
340 string error = _("Resume not supported by server");
341 outputVal->download_failed(&error);
344 //______________________________________________________________________
346 // Function which is called by libwww whenever anything happens for a request
347 BOOL Download::alertCallback(HTRequest* request, HTAlertOpcode op,
348 int /*msgnum*/, const char* /*dfault*/,
349 void* input, HTAlertPar* /*reply*/) {
350 if (request == 0) return NO;
351 // A Download object hides behind the output stream registered with libwww
352 Download* self = getDownload(request);
354 /* If state==ERROR, then output->error() has already been called - don't
355 send further info. */
356 if (self->state == ERROR) return YES;
359 if (input != 0) host = static_cast<char*>(input);
361 if (op != HT_PROG_READ)
362 debug("Alert %1 for %2 obj %3", op, self->uri(), self);
367 info = subst(_("Looking up %L1"), host);
368 self->outputVal->download_message(&info);
370 case HT_PROG_CONNECT:
371 info = subst(_("Contacting %L1"), host);
372 self->outputVal->download_message(&info);
375 info = _("Logging in");
376 self->outputVal->download_message(&info);
379 // This used to be here. It doesn't work with 206 Partial Content
380 //long len = HTAnchor_length(HTRequest_anchor(request));
381 // This one is better
382 HTResponse* response = HTRequest_response(request);
384 if (response != 0) len = HTResponse_length(response);
385 if (len != -1 && static_cast<uint64>(len) != self->currentSize)
386 self->outputVal->download_dataSize(self->resumeOffset() + len);
393 return YES; // Value only relevant for op == HT_A_CONFIRM
395 //______________________________________________________________________
398 struct libwwwError { int code; const char* msg; const char* type; };
399 libwwwError libwwwErrors[] = { HTERR_ENGLISH_INITIALIZER };
402 int Download::afterFilter(HTRequest* request, HTResponse* /*response*/,
403 void* /*param*/, int status) {
404 Download* self = getDownload(request);
407 const char* msg = "";
409 case HT_ERROR: msg = " (HT_ERROR)"; break;
410 case HT_LOADED: msg = " (HT_LOADED)"; break;
411 case HT_PARTIAL_CONTENT: msg = " (HT_PARTIAL_CONTENT)"; break;
412 case HT_NO_DATA: msg = " (HT_NO_DATA)"; break;
413 case HT_NO_ACCESS: msg = " (HT_NO_ACCESS)"; break;
414 case HT_NO_PROXY_ACCESS: msg = " (HT_NO_PROXY_ACCESS)"; break;
415 case HT_RETRY: msg = " (HT_RETRY)"; break;
416 case HT_PERM_REDIRECT: msg = " (HT_PERM_REDIRECT)"; break;
417 case HT_TEMP_REDIRECT: msg = " (HT_TEMP_REDIRECT)"; break;
419 debug("Status %1%2 for %L3 obj %4", status, msg, self->uri(), self);
422 // Download finished, or server dropped connection on us
424 HTResponse* response = HTRequest_response(request);
426 if (response != 0) len = HTResponse_length(response);
427 if (len == -1 || (len + self->resumeOffset()) == self->currentSize) {
429 self->state = SUCCEEDED;
430 self->outputVal->download_succeeded();
435 // The connection dropped or there was a timeout
436 /* libwww returns just -1 (HT_ERROR) if I tear down the HTTP connection
437 very early, at a guess before the headers are transmitted completely.
438 Might want to add -1 in the if() below, but that's a very generic error
440 if (status >= 0 || status == HT_INTERRUPTED || status == HT_TIMEOUT) {
441 self->generateError(INTERRUPTED);
445 self->generateError();
448 //______________________________________________________________________
450 /* This is dirty, dirty - don't look... Unfortunately, the socket used for
451 FTP data connections isn't publically accessible. */
455 // Taken from libwww, HTFTP.c
456 typedef enum _HTFTPState {
460 FTP_NEED_CCON, /* Control connection */
462 FTP_NEED_DCON, /* Data connection */
464 FTP_NEED_SERVER /* For directory listings */
467 // Taken from libwww, HTFTP.c
468 typedef struct _ftp_ctrl {
475 HTFTPState state; /* State of the connection */
476 int substate; /* For hierarchical states */
477 BOOL sent; /* Read or write command */
478 BOOL cwd; /* Done cwd */
479 BOOL reset; /* Expect greeting */
480 FTPServerType server; /* Type of server */
481 HTNet * cnet; /* Control connection */
482 HTNet * dnet; /* Data connection */
483 // This is the HTNet^^^^ that we need to access to find the socket! -- RA
484 BOOL alreadyLoggedIn;
489 /* Pause download by removing the request's socket from the list of sockets
490 to call select() with. */
491 void Download::pauseNow() {
492 Paranoid(state == PAUSE_SCHEDULED);
494 if (request == 0) return;
496 /* The HTNet object whose socket we'll unregister from the event loop. This
497 will prevent more data from being delivered to it, effectively pausing
499 HTNet* net = HTRequest_net(request);
501 unsigned protocol = HTProtocol_id(HTNet_protocol(net));
502 if (protocol == 21) {
503 /* Protocol is FTP, which uses a control connection (which corresponds to
504 the main HTNet object) and a data connection. We need the HTNet object
506 ftp_ctrl* ctrl = static_cast<ftp_ctrl*>(HTNet_context(net));
511 HTChannel* channel = HTHost_channel(HTNet_host(net));
512 HTEvent_unregister(HTChannel_socket(channel), HTEvent_READ);
515 HTEvent_setTimeout(HTNet_event(net), -1); // No timeout for the socket
516 SOCKET socket = HTNet_socket(net);
517 HTEvent_unregister(socket, HTEvent_READ);
518 debug("pauseNow: unregistered socket %1, event %2, cbf %3",
519 int(socket), (void*)HTNet_event(net), (void*)HTNet_event(net)->cbf);
523 // Analogous to pauseNow() above
524 void Download::cont() {
525 if (state == PAUSE_SCHEDULED) state = RUNNING;
526 if (state == RUNNING) return;
529 if (request == 0) return;
531 HTNet* net = HTRequest_net(request);
532 unsigned protocol = HTProtocol_id(HTNet_protocol(net));
533 if (protocol == 21) {
534 ftp_ctrl* ctrl = static_cast<ftp_ctrl*>(HTNet_context(net));
539 HTHost* host = HTNet_host(net);
540 HTHost_unregister(host, net, HTEvent_READ);
541 HTHost_register(host, net, HTEvent_READ);
543 // Register socket again
544 /* For some weird reason the timeout gets reset to 0 somewhere, which
545 causes *immediate* timeouts with glibwww - fix that. */
546 HTEvent* event = HTNet_event(net);
547 HTEvent_setTimeout(event, HTHost_eventTimeout());
548 SOCKET socket = HTNet_socket(net);
549 HTEvent_register(socket, HTEvent_READ, event);
550 debug("cont: registered socket %1, event %2, cbf %3",
551 int(socket), (void*)event, (void*)event->cbf);
554 //______________________________________________________________________
556 void Download::stop() {
557 if (request == 0) return;
558 if (state == ERROR || state == INTERRUPTED || state == SUCCEEDED) return;
559 state = INTERRUPTED;//ERROR;//SUCCEEDED;
562 // Cannot call HTNet_killPipe() (sometimes segfaults), so do it later
563 if (stopLaterId != 0) return;
564 stopLaterId = g_idle_add_full(G_PRIORITY_HIGH_IDLE, &stopLater_callback,
565 (gpointer)this, NULL);
566 Assert(stopLaterId != 0); // because we use 0 as a special value
569 int status = HTNet_killPipe(HTRequest_net(request));
570 debug("stop: HTNet_killPipe() returned %1", status);
572 HTNet_killPipe(HTRequest_net(request));
575 // None of this is really the right thing. Believe me, I tried both. ;-/
576 //string err = _("Download stopped");
577 //outputVal->download_failed(&err);
578 //outputVal->download_succeeded();
581 gboolean Download::stopLater_callback(gpointer data) {
582 Download* self = static_cast<Download*>(data);
583 Assert(self->insideNewData == false);
585 int status = HTNet_killPipe(HTRequest_net(self->request));
586 debug("stopLater_callback: HTNet_killPipe() returned %1", status);
588 HTNet_killPipe(HTRequest_net(self->request));
590 self->stopLaterId = 0;
591 return FALSE; // "Don't call me again"
593 //______________________________________________________________________
595 // Call output->error() with appropriate string taken from request object
596 /* If this is called, the Download is assumed to have failed in a
597 non-recoverable way. */
598 void Download::generateError(State newState) {
599 if (state == ERROR || state == INTERRUPTED || state == SUCCEEDED) return;
601 Assert(request != 0);
602 HTList* errList = HTRequest_error(request);
605 while ((err = static_cast<HTError*>(HTList_removeFirstObject(errList)))) {
606 errIndex = HTError_index(err);
608 libwwwErrors[errIndex].code, libwwwErrors[errIndex].msg);
612 if (strcmp("client_error", libwwwErrors[errIndex].type) == 0
613 || strcmp("server_error", libwwwErrors[errIndex].type) == 0) {
614 // Include error code with HTTP errors
615 append(s, libwwwErrors[errIndex].code);
618 s += libwwwErrors[errIndex].msg;
621 /* libwww is not internationalized, so the string always ought to be UTF-8.
622 Oh well, check just to be sure. */
623 bool validUtf8 = g_utf8_validate(s.c_str(), s.length(), NULL);
627 outputVal->download_failed(&s);