1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
use soft_ascii_string::{ SoftAsciiChar, SoftAsciiString };
use { quoted_printable as extern_quoted_printable };

use failure::Fail;
use ::error::{EncodingError, EncodingErrorKind};
use super::encoded_word::EncodedWordWriter;

/// a quoted printable encoding suitable for content transfer encoding,
/// but _not_ suited for the encoding in encoded words
pub fn normal_encode<A: AsRef<[u8]>>(data: A) -> SoftAsciiString {
    let encoded = extern_quoted_printable::encode_to_str(data);
    SoftAsciiString::from_unchecked(encoded)
}

/// a quoted printable decoding suitable for content transfer encoding
#[inline]
pub fn normal_decode<R: AsRef<[u8]>>(input: R)
    -> Result<Vec<u8>, EncodingError>
{
    //extern_quoted_printable h
    extern_quoted_printable::decode(
        input.as_ref(), extern_quoted_printable::ParseMode::Strict
    ).map_err(|err| err
        .context(EncodingErrorKind::Malformed)
        .into()
    )
}

/// a quoted printable decoding suitable for decoding a quoted printable
/// encpded text in encoded words
#[inline(always)]
pub fn encoded_word_decode<R: AsRef<[u8]>>( input: R ) -> Result<Vec<u8>, EncodingError> {
    //we can just use the stadard decoding
    normal_decode( input )
}

//FIXME we don't use EncodedWord context here,
// instead we use the most restructive context as a basis,
// making it compatilble with all context, but not nessesary
// the best solution...
/// Simple wrapper around ecoded_word_encode for utf8 strings only
pub fn encoded_word_encode_utf8<'a,  O>(word: &str, writer: &mut O )
    where O: EncodedWordWriter
{
    let iter = word.char_indices().map( |(idx, ch)| {
        &word.as_bytes()[idx..idx+ch.len_utf8()]
    });
    encoded_word_encode(iter, writer );
}

///
/// Quoted Printable encoding for Encoded Words in MIME-Headers
///
/// Which means:
/// 1. there is a limit to the maximum number of characters
///    - the limit is 75 INCLUDING the `=?charset?encoding?...?=` overhead
///    - as such the line length limit of quoted printable can not be hit,
///      the quoted printable part is at most 67 chars long, e.g. for utf8
///      it is at most 64 chars
/// 2. has to be one token, so no ' ','\t' and neither soft nor hard newlines
/// 3. no '?' character
///
/// The input is a sequence of bytes split up in chunks where
/// a split in multipl encoded words can be done between any
/// two chunks but not in a chunk. Wrt. utf8 a chunk would
/// correspond to a character, e.g. `[65]` for `'a'` and
/// `[0xe2, 0x99, 0xa5]` for a `'♥'`.
///
/// Note that a chunk can with more than 21 byte is not guranteed to
/// work, and can trigger a panic.
///
/// As this has to be safe for usage in all header contexts, additional
/// to the chars required by the standard (i.e. '=') following chars are ALWAYS
/// quoted' ', '\t', '?', '(', ')'. Also '\n','\r' see the note below for more
/// details.
///
///
/// # Panics:
///
/// 1. if the encoded size of a chunk is more than 16 byte, which can
///    happen if a chunk has more than 5 bytes. For comparison utf8 has
///    at most chunks with 4 bytes leading to at most 12 byte buffer usage.
///
/// 2. if max size if >76 as no new line handling is implemented and
///    the max size for the use case can be at most 67 chars
///
/// 3. if a single encoded chunk can not be written as one because of
///    the length limitation AFTER a new encoded word was started.
///
/// # Note:
///   as it has to be a token no new line characters can appear in the output,
///   BUT q-encoding also forbids the encoding of CRLF line breaks in TEXT!
///   bodies, which is mean to not mess up with the limitations to the line
///   length, but they are allowed to appear in non TEXT data, but this
///   function should, but might not be limited to be used with text data,
///   which should but might not be limited to data not containing any new
///   line character. For now any appearance of '\r' or '\n' will be encoded
///   like any other "special" byte, for the future a context might be needed.
///   (Especially as encoded words can contain non-ascii text in which '\r','\n'
///   might be encoded with completely different bytes, but when the RFC speaks of
///   '\r','\n' it normally means the bytes 10/13 independent of the character set,
///   or if they appear in a image, zip-archiev etc. )
pub fn encoded_word_encode<'a, I, O>(input: I, out: &mut O )
    where I: Iterator<Item=&'a [u8]>, O: EncodedWordWriter
{
    out.write_ecw_start();
    let max_payload_len = out.max_payload_len();
    let mut remaining = max_payload_len;
    //WARN: on remaining being > 67
    let mut buf = [SoftAsciiChar::from_unchecked('X'); 16];

    for chunk in input {
        let mut buf_idx = 0;

        for byte in chunk {
            let byte = *byte;
            match byte {
                // this is the way to go as long as we don't want to behave differently for
                // different context, the COMMENT context allows more chars, and the
                // TEXT context even more
                b'!' | b'*' |
                b'+' | b'-' |
                b'/' | b'_' |
                b'0'...b'9' |
                b'A'...b'Z' |
                b'a'...b'z'  => {
                    buf[buf_idx] = SoftAsciiChar::from_unchecked(byte as char);
                    buf_idx += 1;
                },
                _otherwise => {
                    buf[buf_idx] = SoftAsciiChar::from_unchecked('=');
                    buf[buf_idx+1] = lower_nibble_to_hex( byte >> 4 );
                    buf[buf_idx+2] = lower_nibble_to_hex( byte );
                    buf_idx += 3;
                }
            }
        }
        if buf_idx > remaining {
            out.start_next_encoded_word();
            remaining = max_payload_len;
        }
        if buf_idx > remaining {
            panic!( "single character longer then max length ({:?}) of encoded word", remaining );
        }
        for idx in 0..buf_idx {
            out.write_char( buf[idx]  )
        }
        remaining -= buf_idx;
    }
    out.write_ecw_end()
}

#[inline]
fn lower_nibble_to_hex( half_byte: u8 ) -> SoftAsciiChar {
    static CHARS: &[char] = &[
        '0', '1', '2', '3', '4', '5',
        '6', '7', '8', '9', 'A', 'B',
        'C', 'D', 'E', 'F'
    ];

    SoftAsciiChar::from_unchecked(CHARS[ (half_byte & 0x0F) as usize ])
}




#[cfg(test)]
mod test {
    use soft_ascii_string::SoftAsciiStr;
    use ::bind::encoded_word::EncodedWordEncoding;
    use super::super::encoded_word::VecWriter;
    use super::*;

    #[test]
    fn to_hex() {
        let data = &[
            ('0', 0b11110000),
            ('0', 0b0 ),
            ('7', 0b0111),
            ('7', 0b10111),
            ('F',  0b1111)
        ];
        for &(ch, byte) in data {
            assert_eq!( lower_nibble_to_hex( byte), ch );
        }

    }

    macro_rules! test_ecw_encode {
        ($name:ident, data $data:expr => [$($item:expr),*]) => {
            #[test]
            fn $name() {
                let test_data = $data;
                let mut out = VecWriter::new(
                    SoftAsciiStr::from_unchecked("utf8"),
                    EncodedWordEncoding::QuotedPrintable
                );

                encoded_word_encode_utf8( test_data, &mut out );

                let expected = &[
                    $($item),*
                ];
                let iter = expected.iter()
                    .zip( out.data().iter().map(|x|x.as_str()) )
                    .enumerate();

                for ( idx, (expected, got) ) in iter {
                    if  *expected != got {
                        panic!( " item nr {}: {:?} != {:?} ", idx, expected, got );
                    }
                }

                let e_len = expected.len();
                let g_len = out.data().len();
                if e_len > g_len {
                    panic!( "expected following additional items: {:?}", &expected[g_len..e_len])
                }
                if e_len < g_len {
                    panic!( "got following additional items: {:?}", &out.data()[e_len..g_len])
                }
            }
        };
    }

    test_ecw_encode! { can_be_used_in_comments,
        data "()\"" => [
            "=?utf8?Q?=28=29=22?="
        ]
    }

    test_ecw_encode! { can_be_used_in_phrase,
        data "{}~@#$%^&*()=|\\[]';:." => [
            "=?utf8?Q?=7B=7D=7E=40=23=24=25=5E=26*=28=29=3D=7C=5C=5B=5D=27=3B=3A=2E?="
        ]
    }

    test_ecw_encode! { bad_chars_in_all_contexts,
        data "?= \t\r\n" => [
            "=?utf8?Q?=3F=3D=20=09=0D=0A?="
        ]
    }

    test_ecw_encode!{ encode_ascii,
        data  "abcdefghijklmnopqrstuvwxyz \t?=0123456789!@#$%^&*()_+-" => [
             "=?utf8?Q?abcdefghijklmnopqrstuvwxyz=20=09=3F=3D0123456789!=40=23=24=25=5E?=",
             "=?utf8?Q?=26*=28=29_+-?="
        ]
    }

    test_ecw_encode! { how_it_handales_newlines,
        data "\r\n" => [
            "=?utf8?Q?=0D=0A?="
        ]
    }


    test_ecw_encode! { split_into_multiple_ecws,
        data "0123456789012345678901234567890123456789012345678901234567891234newline" => [
            "=?utf8?Q?0123456789012345678901234567890123456789012345678901234567891234?=",
            "=?utf8?Q?newline?="
        ]
    }

    test_ecw_encode!{ bigger_chunks,
        data "ランダムテキスト ראַנדאָם טעקסט" => [
            //ランダムテキス
            "=?utf8?Q?=E3=83=A9=E3=83=B3=E3=83=80=E3=83=A0=E3=83=86=E3=82=AD=E3=82=B9?=",
            //ト ראַנדאָם
            "=?utf8?Q?=E3=83=88=20=D7=A8=D7=90=D6=B7=D7=A0=D7=93=D7=90=D6=B8=D7=9D=20?=",
            //טעקסט
            "=?utf8?Q?=D7=98=D7=A2=D7=A7=D7=A1=D7=98?="
        ]
    }

    #[test]
    fn ecw_decode() {
        let pairs = [
            ("=28=29=22", "()\""),
            (
                "=7B=7D=7E=40=23=24=25=5E=26*=28=29=3D=7C=5C=5B=5D=27=3B=3A=2E",
                "{}~@#$%^&*()=|\\[]';:."
            ),
            (
                "=3F=3D=20=09=0D=0A",
                "?= \t\r\n"
            ),
            (
                "=26*=28=29_+-",
                "&*()_+-"
            ),
            (
                "abcdefghijklmnopqrstuvwxyz=20=09=3F=3D0123456789!=40=23=24=25=5E",
                "abcdefghijklmnopqrstuvwxyz \t?=0123456789!@#$%^"
            ),
            (
                "=0D=0A",
                "\r\n"
            ),
            (
                "=E3=83=A9=E3=83=B3=E3=83=80=E3=83=A0=E3=83=86=E3=82=AD=E3=82=B9",
                "ランダムテキス"
            ),
            (
                "=E3=83=88=20=D7=A8=D7=90=D6=B7=D7=A0=D7=93=D7=90=D6=B8=D7=9D=20",
                "ト ראַנדאָם "
            ),
            (
                "=D7=98=D7=A2=D7=A7=D7=A1=D7=98",
                "טעקסט"
            )
        ];
        for &(inp, outp) in pairs.iter() {
            let dec = assert_ok!(encoded_word_decode(inp));
            let dec = String::from_utf8(dec).unwrap();
            assert_eq!(
                outp.as_bytes(),
                dec.as_bytes()
            );
        }
    }

    #[test]
    fn normal_encode_text() {
        let text = concat!(
            "This is a llllllllllllllllllllllllllllllllllllll00000000000000000000ng test    0123456789qwertyuio\r\n",
            "With many lines\r\n",
            "And utf→→→→8"
        );
        let encoded = normal_encode(text);
        assert_eq!(
            concat!(
                "This is a llllllllllllllllllllllllllllllllllllll00000000000000000000ng test=\r\n",
                "    0123456789qwertyuio\r\n",
                "With many lines\r\n",
                "And utf=E2=86=92=E2=86=92=E2=86=92=E2=86=928"
            ),
            encoded.as_str()
        );
    }

    #[test]
    fn normal_decode_text() {
        let text = concat!(
                "This is a llllllllllllllllllllllllllllllllllllll00000000000000000000ng test=\r\n",
                "    0123456789qwertyuio\r\n",
                "With many lines\r\n",
                "And utf=E2=86=92=E2=86=92=E2=86=92=E2=86=928"
            );
        let encoded = String::from_utf8(normal_decode(text).unwrap()).unwrap();
        assert_eq!(
            concat!(
                "This is a llllllllllllllllllllllllllllllllllllll00000000000000000000ng test    0123456789qwertyuio\r\n",
                "With many lines\r\n",
                "And utf→→→→8"
            ),
            encoded.as_str()
        );
    }
}