I've just finished to download libxml sources and documentation. Here's an excerpt that shows how libxml writes an XML document:

sum = 0; count = xmlOutputBufferWriteString(writer->out, "<?xml version="); if (count < 0) return -1; sum += count; count = xmlOutputBufferWrite(writer->out, 1, &writer->qchar); if (count < 0) return -1; sum += count; if (version != 0) count = xmlOutputBufferWriteString(writer->out, version); else count = xmlOutputBufferWriteString(writer->out, "1.0"); if (count < 0) return -1; sum += count; count = xmlOutputBufferWrite(writer->out, 1, &writer->qchar); if (count < 0) return -1; sum += count; if (writer->out->encoder != 0) { count = xmlOutputBufferWriteString(writer->out, " encoding="); if (count < 0) return -1; sum += count; count = xmlOutputBufferWrite(writer->out, 1, &writer->qchar); if (count < 0) return -1; sum += count; count = xmlOutputBufferWriteString(writer->out, writer->out->encoder->name); if (count < 0) return -1; sum += count; count = xmlOutputBufferWrite(writer->out, 1, &writer->qchar); if (count < 0) return -1; sum += count; } if (standalone != 0) { count = xmlOutputBufferWriteString(writer->out, " standalone="); if (count < 0) return -1; sum += count; count = xmlOutputBufferWrite(writer->out, 1, &writer->qchar); if (count < 0) return -1; sum += count; count = xmlOutputBufferWriteString(writer->out, standalone); if (count < 0) return -1; sum += count; count = xmlOutputBufferWrite(writer->out, 1, &writer->qchar); if (count < 0) return -1; sum += count; } count = xmlOutputBufferWriteString(writer->out, "?>\n"); if (count < 0) return -1; sum += count; return sum;

This is a buffered stream-based approach with strings. How fast is it? Believe me, it's faster than light!