1 | # -*- coding: utf-8 -*-
2 | # $Id: timezoneinfo-gen.py 76553 2019-01-01 01:45:53Z vboxsync $
3 |
4 | """
5 | Generates timezone mapping info from public domain tz data and
6 | simple windows tables.
7 | """
8 | from __future__ import print_function;
9 |
10 | __copyright__ = \
11 | """
12 | Copyright (C) 2017-2019 Oracle Corporation
13 |
14 | This file is part of VirtualBox Open Source Edition (OSE), as
15 | available from http://www.alldomusa.eu.org. This file is free software;
16 | you can redistribute it and/or modify it under the terms of the GNU
17 | General Public License (GPL) as published by the Free Software
18 | Foundation, in version 2 as it comes in the "COPYING" file of the
19 | VirtualBox OSE distribution. VirtualBox OSE is distributed in the
20 | hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
21 |
22 | The contents of this file may alternatively be used under the terms
23 | of the Common Development and Distribution License Version 1.0
24 | (CDDL) only, as it comes in the "COPYING.CDDL" file of the
25 | VirtualBox OSE distribution, in which case the provisions of the
26 | CDDL are applicable instead of those of the GPL.
27 |
28 | You may elect to license modified versions of this file under the
29 | terms and conditions of either the GPL or the CDDL or both.
30 | """
31 | __version__ = "$Revision: 76553 $"
32 |
33 | import os;
34 | import sys;
35 | import xml.etree.ElementTree as ElementTree;
36 |
37 |
38 | class TzWinZoneEntry(object):
39 | def __init__(self):
40 | self.sWinName = None;
41 | self.sWinTerritory = None;
42 | self.fWinGolden = False;
43 | self.idxWin = 0;
44 |
45 | class TzLinkEntry(TzWinZoneEntry):
46 | def __init__(self, sLinkNm, sTarget):
47 | TzWinZoneEntry.__init__(self);
48 | self.sLinkNm = sLinkNm;
49 | self.sTarget = sTarget;
50 |
51 | class TzZoneOffset(object):
52 | def __init__(self, asFields):
53 | self.sOffset = asFields[0]; # GMT offset expression
54 | self.sRules = asFields[1] if len(asFields) > 1 and asFields[1] not in [ '-', '' ] else None;
55 | self.sFormat = asFields[2] if len(asFields) > 2 and asFields[2] not in [ '-', '' ] else None;
56 | self.sUntil = asFields[3] if len(asFields) > 3 and asFields[3] not in [ '-', '' ] else None;
57 |
58 | class TzZoneEntry(TzWinZoneEntry):
59 | def __init__(self, sName):
60 | TzWinZoneEntry.__init__(self);
61 | self.sName = sName;
62 | self.sTerritory = 'ZZ';
63 | self.aOffsets = []; # type: list(TzZoneOffset)
64 |
65 | class TzZoneRule(object):
66 | def __init__(self, sName, sFrom, sTo, sType, sIn, sOn, sAt, sSave, sLetter):
67 | self.sName = sName;
68 | self.sFrom = sFrom if sFrom not in [ '-', '' ] else None;
69 | self.sTo = sTo if sFrom not in [ '-', '' ] else None;
70 | self.sType = sType if sType not in [ '-', '' ] else None;
71 | self.sIn = sIn if sIn not in [ '-', '' ] else None;
72 | self.sAt = sAt if sAt not in [ '-', '' ] else None;
73 | self.sSave = sSave if sSave not in [ '-', '' ] else None;
74 | self.sLetter = sLetter if sLetter not in [ '-', '' ] else None;
75 |
76 | def info(sMsg):
77 | """
78 | Outputs an informational message to stderr.
79 | """
80 | print('info: ' + sMsg, file=sys.stderr);
81 |
82 | def warning(sMsg):
83 | """
84 | Outputs a warning (to stderr).
85 | """
86 | print('warning: ' + sMsg, file=sys.stderr);
87 |
88 | def error(sMsg):
89 | """
90 | Outputs a warning (to stderr).
91 | """
92 | print('error: ' + sMsg, file=sys.stderr);
93 |
94 | def readTzDataFile(sFile):
95 | """ Reads the given data file into memory, stripping comments. """
96 | oInFile = open(sFile, 'r');
97 | asLines = oInFile.readlines();
98 | oInFile.close();
99 | iLine = 0;
100 | while iLine < len(asLines):
101 | offHash = asLines[iLine].find('#');
102 | if offHash >= 0:
103 | asLines[iLine] = asLines[iLine][:offHash].rstrip();
104 | else:
105 | asLines[iLine] = asLines[iLine].rstrip();
106 | iLine += 1;
107 | return asLines;
108 |
109 | #
110 | # tzdata structures.
111 | #
112 | g_dZones = {};
113 | g_dRules = {};
114 | g_dLinks = {};
115 |
116 | def readTzData(sTzDataDir):
117 | """
118 | Reads in the bits we want from tz data. Assumes 2017b edition.
119 | """
120 |
121 | #
122 | # Parse the tzdata files.
123 | #
124 | for sFile in [ 'africa', 'antarctica', 'asia', 'australasia', 'europe', 'northamerica', 'southamerica',
125 | 'pacificnew', 'etcetera', 'backward', 'systemv', 'factory', #'backzone'
126 | ]:
127 | sIn = 'none';
128 | asLines = readTzDataFile(os.path.join(sTzDataDir, sFile));
129 | iLine = 0;
130 | while iLine < len(asLines):
131 | sLine = asLines[iLine];
132 | sStrippedLine = sLine.strip(); # Fully stripped version.
133 | if sStrippedLine:
134 | asFields = sLine.split();
135 | try:
136 | if sLine.startswith('Zone'): # 'Rule' NAME FROM TO TYPE IN ON AT SAVE LETTER/S
137 | sIn = 'Zone';
138 | oZone = TzZoneEntry(asFields[1]);
139 | if oZone.sName in g_dZones: raise Exception('duplicate: %s' % (oZone.sName,));
140 | g_dZones[oZone.sName] = oZone;
141 | oZone.aOffsets.append(TzZoneOffset(asFields[2:]));
142 | elif sLine.startswith('Rule'): # 'Rule' NAME FROM TO TYPE IN ON AT SAVE LETTER/S
143 | oRule = TzZoneRule(asFields[1], asFields[2], asFields[3], asFields[4], asFields[5],
144 | asFields[6], asFields[7], asFields[8], asFields[9]);
145 | if oRule.sName not in g_dRules:
146 | g_dRules[oRule] = [oRule,];
147 | else:
148 | g_dRules[oRule].append(oRule);
149 | elif sLine.startswith('Link'):
150 | if len(asFields) != 3: raise Exception("malformed link: len(asFields) = %d" % (len(asFields)));
151 | oLink = TzLinkEntry(asFields[2].strip(), asFields[1].strip());
152 | if oLink.sLinkNm not in g_dLinks:
153 | g_dLinks[oLink.sLinkNm] = oLink;
154 | elif g_dLinks[oLink.sLinkNm].sTarget != oLink.sTarget:
155 | warning('duplicate link for %s: new target %s, previous %s'
156 | % (oLink.sLinkNm, oLink.sTarget, g_dLinks[oLink.sLinkNm].sTarget,));
157 | elif sIn == 'Zone':
158 | oZone.aOffsets.append(TzZoneEntry(asFields[3:]));
159 | else:
160 | raise Exception('what is this?')
161 | except Exception as oXcpt:
162 | error("line %u in %s: '%s'" % (iLine + 1, sFile, type(oXcpt) if not str(oXcpt) else str(oXcpt),));
163 | info("'%s'" % (asLines[iLine],));
164 | return 1;
165 | iLine += 1;
166 |
167 | #
168 | # Process the country <-> zone mapping file.
169 | #
170 | asLines = readTzDataFile(os.path.join(sTzDataDir, 'zone.tab'));
171 | iLine = 0;
172 | while iLine < len(asLines):
173 | sLine = asLines[iLine];
174 | if sLine and sLine[0] != ' ':
175 | asFields = sLine.split('\t');
176 | try:
177 | sTerritory = asFields[0];
178 | if len(sTerritory) != 2: raise Exception('malformed country: %s' % (sTerritory,));
179 | sZone = asFields[2];
180 | oZone = g_dZones.get(sZone);
181 | if oZone:
182 | if oZone.sTerritory and oZone.sTerritory != 'ZZ':
183 | raise Exception('zone %s already have country %s associated with it (setting %s)'
184 | % (sZone, oZone.sTerritory, sTerritory));
185 | oZone.sTerritory = sTerritory;
186 | else:
187 | oLink = g_dLinks.get(sZone);
188 | if oLink:
189 | pass; # ignore country<->link associations for now.
190 | else: raise Exception('country zone not found: %s' % (sZone,));
191 |
192 | except Exception as oXcpt:
193 | error("line %u in %s: '%s'" % (iLine + 1, 'zone.tab', type(oXcpt) if not str(oXcpt) else str(oXcpt),));
194 | info("'%s'" % (asLines[iLine],));
195 | return 1;
196 | iLine += 1;
197 | return 0
198 |
199 |
200 | def readWindowsToTzMap(sMapXml):
201 | """
202 | Reads the 'common/supplemental/windowsZones.xml' file from http://cldr.unicode.org/.
203 | """
204 | oXmlDoc = ElementTree.parse(sMapXml);
205 | oMap = oXmlDoc.getroot().find('windowsZones').find('mapTimezones');
206 | # <mapZone other="Line Islands Standard Time" territory="001" type="Pacific/Kiritimati"/>
207 | for oChild in oMap.findall('mapZone'):
208 | sTerritory = oChild.attrib['territory'];
209 | sWinZone = oChild.attrib['other'];
210 | asUnixZones = oChild.attrib['type'].split();
211 | for sZone in asUnixZones:
212 | oZone = g_dZones.get(sZone);
213 | if oZone:
214 | if oZone.sWinName is None or (oZone.sWinTerritory == '001' and oZone.sWinName == sWinZone):
215 | oZone.sWinName = sWinZone;
216 | oZone.sWinTerritory = sTerritory;
217 | if sTerritory == '001':
218 | oZone.fWinGolden = True;
219 | else:
220 | warning('zone "%s" have more than one windows mapping: %s (%s) and now %s (%s)'
221 | % (sZone, oZone.sWinName, oZone.sWinTerritory, sWinZone, sTerritory));
222 | else:
223 | oLink = g_dLinks.get(sZone);
224 | if oLink:
225 | if oLink.sWinName is None or (oLink.sWinTerritory == '001' and oLink.sWinName == sWinZone):
226 | oLink.sWinName = sWinZone;
227 | oLink.sWinTerritory = sTerritory;
228 | if sTerritory == '001':
229 | oLink.fWinGolden = True;
230 | else:
231 | warning('zone-link "%s" have more than one windows mapping: %s (%s) and now %s (%s)'
232 | % (sZone, oLink.sWinName, oLink.sWinTerritory, sWinZone, sTerritory));
233 | else:
234 | warning('could not find zone "%s" (for mapping win zone "%s" to) - got the right data sets?'
235 | % (sZone, sWinZone));
236 | return 0;
237 |
238 |
239 | def readWindowsIndexes(sFile):
240 | """
241 | Reads the windows time zone index from the table in the given file and sets idxWin.
242 |
243 | Assumes format: index{tab}name{tab}(GMT{offset}){space}{cities}
244 |
245 | For instance: https://support.microsoft.com/en-gb/help/973627/microsoft-time-zone-index-values
246 | """
247 | # Read the file.
248 | oInFile = open(sFile, "r");
249 | asLines = oInFile.readlines();
250 | oInFile.close();
251 |
252 | # Check the header.
253 | if not asLines[0].startswith('Index'):
254 | error('expected first line of "%s" to start with "Index"' % (sFile,));
255 | return 1;
256 | fHexIndex = asLines[0].find('hex') > 0;
257 | iLine = 1;
258 | while iLine < len(asLines):
259 | # Parse.
260 | asFields = asLines[iLine].split('\t');
261 | try:
262 | idxWin = int(asFields[0].strip(), 16 if fHexIndex else 10);
263 | sWinName = asFields[1].strip();
264 | sLocations = ' '.join(asFields[2].split());
265 | if sWinName.find('(GMT') >= 0: raise Exception("oops #1");
266 | if not sLocations.startswith('(GMT'): raise Exception("oops #2");
267 | sStdOffset = sLocations[sLocations.find('(') + 1 : sLocations.find(')')].strip().replace(' ','');
268 | sLocations = sLocations[sLocations.find(')') + 1 : ].strip();
269 | except Exception as oXcpt:
270 | error("line %u in %s: '%s'" % (iLine + 1, sFile, type(oXcpt) if not str(oXcpt) else str(oXcpt),));
271 | info("'%s'" % (asLines[iLine],));
272 | return 1;
273 |
274 | # Some name adjustments.
275 | sWinName = sWinName.lower();
276 | if sWinName.startswith('a.u.s.'):
277 | sWinName = 'aus' + sWinName[6:];
278 | elif sWinName.startswith('u.s. '):
279 | sWinName = 'us ' + sWinName[5:];
280 | elif sWinName.startswith('s.a. '):
281 | sWinName = 'sa ' + sWinName[5:];
282 | elif sWinName.startswith('s.e. '):
283 | sWinName = 'se ' + sWinName[5:];
284 | elif sWinName.startswith('pacific s.a. '):
285 | sWinName = 'pacific sa ' + sWinName[13:];
286 |
287 | # Update zone entries with matching windows names.
288 | cUpdates = 0;
289 | for sZone in g_dZones:
290 | oZone = g_dZones[sZone];
291 | if oZone.sWinName and oZone.sWinName.lower() == sWinName:
292 | oZone.idxWin = idxWin;
293 | cUpdates += 1;
294 | #info('idxWin=%#x - %s / %s' % (idxWin, oZone.sName, oZone.sWinName,));
295 | if cUpdates == 0:
296 | warning('No matching zone found for index zone "%s" (%#x, %s)' % (sWinName, idxWin, sLocations));
297 |
298 | # Advance.
299 | iLine += 1;
300 | return 0;
301 |
302 | def getPadding(sField, cchWidth):
303 | """ Returns space padding for the given field string. """
304 | if len(sField) < cchWidth:
305 | return ' ' * (cchWidth - len(sField));
306 | return '';
307 |
308 | def formatFields(sName, oZone, oWinZone):
309 | """ Formats the table fields. """
310 |
312 | # const char *pszUnixName;
313 | # const char *pszWindowsName;
314 | # uint8_t cchUnixName;
315 | # uint8_t cchWindowsName;
316 | # char szCountry[3];
317 | # char szWindowsCountry[3];
318 | # uint32_t idxWindows;
319 | # uint32_t uReserved;
320 |
321 | asFields = [ '"%s"' % sName, ];
322 | if oWinZone.sWinName:
323 | asFields.append('"%s"' % oWinZone.sWinName);
324 | else:
325 | asFields.append('NULL');
326 |
327 | asFields.append('%u' % (len(sName),));
328 | if oWinZone.sWinName:
329 | asFields.append('%u' % (len(oWinZone.sWinName),));
330 | else:
331 | asFields.append('0');
332 |
333 | asFields.append('"%s"' % (oZone.sTerritory,));
334 | if oWinZone.sWinTerritory:
335 | asFields.append('"%s"' % (oWinZone.sWinTerritory,));
336 | else:
337 | asFields.append('""');
338 | asFields.append('%#010x' % (oWinZone.idxWin,));
339 |
340 | asFlags = [];
341 | if oWinZone.fWinGolden:
342 | asFlags.append('RTTIMEZONEINFO_F_GOLDEN');
343 | if asFlags:
344 | asFields.append(' | '.join(asFlags));
345 | else:
346 | asFields.append('0');
347 | return asFields;
348 |
349 | def produceCode(oDst):
350 | """
351 | Produces the tables.
352 | """
353 |
354 | #
355 | # Produce the info table.
356 | #
357 | aasEntries = [];
358 |
359 | # The straight zones.
360 | for sZone in g_dZones:
361 | asFields = formatFields(sZone, g_dZones[sZone], g_dZones[sZone]);
362 | aasEntries.append(asFields);
363 |
364 | # The links.
365 | for sZone in g_dLinks:
366 | oLink = g_dLinks[sZone];
367 | asFields = formatFields(sZone, g_dZones[oLink.sTarget], oLink);
368 | aasEntries.append(asFields);
369 |
370 | # Figure field lengths.
371 | acchFields = [ 2, 2, 2, 2, 4, 4, 10, 1 ];
372 | for asFields in aasEntries:
373 | assert len(asFields) == len(acchFields);
374 | for iField, sField in enumerate(asFields):
375 | if len(sField) > acchFields[iField]:
376 | acchFields[iField] = len(sField);
377 |
378 | # Sort the data on zone name.
379 | aasEntries.sort();
380 |
381 | # Do the formatting.
382 | oDst.write('/**\n'
383 | ' * Static time zone mapping info. Sorted by pszUnixName.\n'
384 | ' */\n'
385 | 'static const RTTIMEZONEINFO g_aTimeZones[] =\n'
386 | '{\n');
387 | for iEntry, asFields in enumerate(aasEntries):
388 | sLine = ' { ';
389 | for iField, sField in enumerate(asFields):
390 | sLine += sField;
391 | sLine += ', ';
392 | sLine += getPadding(sField, acchFields[iField]);
393 | sLine += ' }, /* %#05x */\n' % (iEntry,);
394 | oDst.write(sLine);
395 | oDst.write('};\n'
396 | '\n');
397 |
398 | #
399 | # Now produce a lookup table for windows time zone names, with indexes into
400 | # the g_aTimeZone table.
401 | #
402 | aasLookup = [];
403 | for iEntry, asFields in enumerate(aasEntries):
404 | if asFields[1] != 'NULL':
405 | aasLookup.append([ asFields[1], # sWinName
406 | -1 if asFields[7].find('RTTIMEZONEINFO_F_GOLDEN') >= 0 else 1,
407 | asFields[5], # sWinTerritory
408 | iEntry,
409 | asFields[0]]); # sZone
410 | aasLookup.sort();
411 |
412 | oDst.write('/**\n'
413 | ' * Windows time zone lookup table. Sorted by name, golden flag and territory.\n'
414 | ' */\n'
415 | 'static const uint16_t g_aidxWinTimeZones[] = \n'
416 | '{\n');
417 | for asFields in aasLookup:
418 | sLine = ' %#05x, /* %s' % (asFields[3], asFields[0][1:-1]);
419 | sLine += getPadding(asFields[0], acchFields[1]);
420 | sLine += ' / %s%s' % (asFields[2][1:-1], '+' if asFields[1] < 0 else ' ');
421 | if len(asFields[2]) == 2:
422 | sLine += ' ';
423 | sLine += ' ==> %s */\n' % (asFields[4][1:-1],)
424 | oDst.write(sLine);
425 |
426 | oDst.write('};\n'
427 | '\n');
428 |
429 | return 0;
430 |
431 |
432 | def main(asArgs):
433 | """
434 | C-like main function.
435 | """
436 | if len(asArgs) != 4:
437 | error("Takes exacty three arguments: <ms-index-file> <ms-key-file> <tz-data-dir>");
438 | return 1;
439 | sTzDataDir = asArgs[1];
440 | sWinToTzMap = asArgs[2];
441 | sWinIndexTable = asArgs[3];
442 |
443 | #
444 | # Read in the data first.
445 | #
446 | iRc = readTzData(sTzDataDir);
447 | if iRc == 0:
448 | iRc = readWindowsToTzMap(sWinToTzMap);
449 | if iRc == 0:
450 | iRc = readWindowsIndexes(sWinIndexTable);
451 | if iRc == 0:
452 | #
453 | # Produce the C table.
454 | #
455 | iRc = produceCode(sys.stdout);
456 | return iRc;
457 |
458 | if __name__ == '__main__':
459 | sys.exit(main(sys.argv));
460 |