VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/time/timezoneinfo-gen.py@ 93943

Last change on this file since 93943 was 93115, checked in by vboxsync, 3 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.9 KB
Line 
1# -*- coding: utf-8 -*-
2# $Id: timezoneinfo-gen.py 93115 2022-01-01 11:31:46Z vboxsync $
3
4"""
5Generates timezone mapping info from public domain tz data and
6simple windows tables.
7"""
8from __future__ import print_function;
9
10__copyright__ = \
11"""
12Copyright (C) 2017-2022 Oracle Corporation
13
14This file is part of VirtualBox Open Source Edition (OSE), as
15available from http://www.virtualbox.org. This file is free software;
16you can redistribute it and/or modify it under the terms of the GNU
17General Public License (GPL) as published by the Free Software
18Foundation, in version 2 as it comes in the "COPYING" file of the
19VirtualBox OSE distribution. VirtualBox OSE is distributed in the
20hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
21
22The contents of this file may alternatively be used under the terms
23of the Common Development and Distribution License Version 1.0
24(CDDL) only, as it comes in the "COPYING.CDDL" file of the
25VirtualBox OSE distribution, in which case the provisions of the
26CDDL are applicable instead of those of the GPL.
27
28You may elect to license modified versions of this file under the
29terms and conditions of either the GPL or the CDDL or both.
30"""
31__version__ = "$Revision: 93115 $"
32
33import os;
34import sys;
35import xml.etree.ElementTree as ElementTree;
36
37
38class TzWinZoneEntry(object):
39 def __init__(self):
40 self.sWinName = None;
41 self.sWinTerritory = None;
42 self.fWinGolden = False;
43 self.idxWin = 0;
44
45class TzLinkEntry(TzWinZoneEntry):
46 def __init__(self, sLinkNm, sTarget):
47 TzWinZoneEntry.__init__(self);
48 self.sLinkNm = sLinkNm;
49 self.sTarget = sTarget;
50
51class TzZoneOffset(object):
52 def __init__(self, asFields):
53 self.sOffset = asFields[0]; # GMT offset expression
54 self.sRules = asFields[1] if len(asFields) > 1 and asFields[1] not in [ '-', '' ] else None;
55 self.sFormat = asFields[2] if len(asFields) > 2 and asFields[2] not in [ '-', '' ] else None;
56 self.sUntil = asFields[3] if len(asFields) > 3 and asFields[3] not in [ '-', '' ] else None;
57
58class TzZoneEntry(TzWinZoneEntry):
59 def __init__(self, sName):
60 TzWinZoneEntry.__init__(self);
61 self.sName = sName;
62 self.sTerritory = 'ZZ';
63 self.aOffsets = []; # type: list(TzZoneOffset)
64
65class TzZoneRule(object):
66 def __init__(self, sName, sFrom, sTo, sType, sIn, sOn, sAt, sSave, sLetter):
67 self.sName = sName;
68 self.sFrom = sFrom if sFrom not in [ '-', '' ] else None;
69 self.sTo = sTo if sFrom not in [ '-', '' ] else None;
70 self.sType = sType if sType not in [ '-', '' ] else None;
71 self.sIn = sIn if sIn not in [ '-', '' ] else None;
72 self.sAt = sAt if sAt not in [ '-', '' ] else None;
73 self.sSave = sSave if sSave not in [ '-', '' ] else None;
74 self.sLetter = sLetter if sLetter not in [ '-', '' ] else None;
75
76def info(sMsg):
77 """
78 Outputs an informational message to stderr.
79 """
80 print('info: ' + sMsg, file=sys.stderr);
81
82def warning(sMsg):
83 """
84 Outputs a warning (to stderr).
85 """
86 print('warning: ' + sMsg, file=sys.stderr);
87
88def error(sMsg):
89 """
90 Outputs a warning (to stderr).
91 """
92 print('error: ' + sMsg, file=sys.stderr);
93
94def readTzDataFile(sFile):
95 """ Reads the given data file into memory, stripping comments. """
96 oInFile = open(sFile, 'r');
97 asLines = oInFile.readlines();
98 oInFile.close();
99 iLine = 0;
100 while iLine < len(asLines):
101 offHash = asLines[iLine].find('#');
102 if offHash >= 0:
103 asLines[iLine] = asLines[iLine][:offHash].rstrip();
104 else:
105 asLines[iLine] = asLines[iLine].rstrip();
106 iLine += 1;
107 return asLines;
108
109#
110# tzdata structures.
111#
112g_dZones = {};
113g_dRules = {};
114g_dLinks = {};
115
116def readTzData(sTzDataDir):
117 """
118 Reads in the bits we want from tz data. Assumes 2017b edition.
119 """
120
121 #
122 # Parse the tzdata files.
123 #
124 for sFile in [ 'africa', 'antarctica', 'asia', 'australasia', 'europe', 'northamerica', 'southamerica',
125 'pacificnew', 'etcetera', 'backward', 'systemv', 'factory', #'backzone'
126 ]:
127 sIn = 'none';
128 asLines = readTzDataFile(os.path.join(sTzDataDir, sFile));
129 iLine = 0;
130 while iLine < len(asLines):
131 sLine = asLines[iLine];
132 sStrippedLine = sLine.strip(); # Fully stripped version.
133 if sStrippedLine:
134 asFields = sLine.split();
135 try:
136 if sLine.startswith('Zone'): # 'Rule' NAME FROM TO TYPE IN ON AT SAVE LETTER/S
137 sIn = 'Zone';
138 oZone = TzZoneEntry(asFields[1]);
139 if oZone.sName in g_dZones: raise Exception('duplicate: %s' % (oZone.sName,));
140 g_dZones[oZone.sName] = oZone;
141 oZone.aOffsets.append(TzZoneOffset(asFields[2:]));
142 elif sLine.startswith('Rule'): # 'Rule' NAME FROM TO TYPE IN ON AT SAVE LETTER/S
143 oRule = TzZoneRule(asFields[1], asFields[2], asFields[3], asFields[4], asFields[5],
144 asFields[6], asFields[7], asFields[8], asFields[9]);
145 if oRule.sName not in g_dRules:
146 g_dRules[oRule] = [oRule,];
147 else:
148 g_dRules[oRule].append(oRule);
149 elif sLine.startswith('Link'):
150 if len(asFields) != 3: raise Exception("malformed link: len(asFields) = %d" % (len(asFields)));
151 oLink = TzLinkEntry(asFields[2].strip(), asFields[1].strip());
152 if oLink.sLinkNm not in g_dLinks:
153 g_dLinks[oLink.sLinkNm] = oLink;
154 elif g_dLinks[oLink.sLinkNm].sTarget != oLink.sTarget:
155 warning('duplicate link for %s: new target %s, previous %s'
156 % (oLink.sLinkNm, oLink.sTarget, g_dLinks[oLink.sLinkNm].sTarget,));
157 elif sIn == 'Zone':
158 oZone.aOffsets.append(TzZoneEntry(asFields[3:]));
159 else:
160 raise Exception('what is this?')
161 except Exception as oXcpt:
162 error("line %u in %s: '%s'" % (iLine + 1, sFile, type(oXcpt) if not str(oXcpt) else str(oXcpt),));
163 info("'%s'" % (asLines[iLine],));
164 return 1;
165 iLine += 1;
166
167 #
168 # Process the country <-> zone mapping file.
169 #
170 asLines = readTzDataFile(os.path.join(sTzDataDir, 'zone.tab'));
171 iLine = 0;
172 while iLine < len(asLines):
173 sLine = asLines[iLine];
174 if sLine and sLine[0] != ' ':
175 asFields = sLine.split('\t');
176 try:
177 sTerritory = asFields[0];
178 if len(sTerritory) != 2: raise Exception('malformed country: %s' % (sTerritory,));
179 sZone = asFields[2];
180 oZone = g_dZones.get(sZone);
181 if oZone:
182 if oZone.sTerritory and oZone.sTerritory != 'ZZ':
183 raise Exception('zone %s already have country %s associated with it (setting %s)'
184 % (sZone, oZone.sTerritory, sTerritory));
185 oZone.sTerritory = sTerritory;
186 else:
187 oLink = g_dLinks.get(sZone);
188 if oLink:
189 pass; # ignore country<->link associations for now.
190 else: raise Exception('country zone not found: %s' % (sZone,));
191
192 except Exception as oXcpt:
193 error("line %u in %s: '%s'" % (iLine + 1, 'zone.tab', type(oXcpt) if not str(oXcpt) else str(oXcpt),));
194 info("'%s'" % (asLines[iLine],));
195 return 1;
196 iLine += 1;
197 return 0
198
199
200def readWindowsToTzMap(sMapXml):
201 """
202 Reads the 'common/supplemental/windowsZones.xml' file from http://cldr.unicode.org/.
203 """
204 oXmlDoc = ElementTree.parse(sMapXml);
205 oMap = oXmlDoc.getroot().find('windowsZones').find('mapTimezones');
206 # <mapZone other="Line Islands Standard Time" territory="001" type="Pacific/Kiritimati"/>
207 for oChild in oMap.findall('mapZone'):
208 sTerritory = oChild.attrib['territory'];
209 sWinZone = oChild.attrib['other'];
210 asUnixZones = oChild.attrib['type'].split();
211 for sZone in asUnixZones:
212 oZone = g_dZones.get(sZone);
213 if oZone:
214 if oZone.sWinName is None or (oZone.sWinTerritory == '001' and oZone.sWinName == sWinZone):
215 oZone.sWinName = sWinZone;
216 oZone.sWinTerritory = sTerritory;
217 if sTerritory == '001':
218 oZone.fWinGolden = True;
219 else:
220 warning('zone "%s" have more than one windows mapping: %s (%s) and now %s (%s)'
221 % (sZone, oZone.sWinName, oZone.sWinTerritory, sWinZone, sTerritory));
222 else:
223 oLink = g_dLinks.get(sZone);
224 if oLink:
225 if oLink.sWinName is None or (oLink.sWinTerritory == '001' and oLink.sWinName == sWinZone):
226 oLink.sWinName = sWinZone;
227 oLink.sWinTerritory = sTerritory;
228 if sTerritory == '001':
229 oLink.fWinGolden = True;
230 else:
231 warning('zone-link "%s" have more than one windows mapping: %s (%s) and now %s (%s)'
232 % (sZone, oLink.sWinName, oLink.sWinTerritory, sWinZone, sTerritory));
233 else:
234 warning('could not find zone "%s" (for mapping win zone "%s" to) - got the right data sets?'
235 % (sZone, sWinZone));
236 return 0;
237
238
239def readWindowsIndexes(sFile):
240 """
241 Reads the windows time zone index from the table in the given file and sets idxWin.
242
243 Assumes format: index{tab}name{tab}(GMT{offset}){space}{cities}
244
245 For instance: https://support.microsoft.com/en-gb/help/973627/microsoft-time-zone-index-values
246 """
247 # Read the file.
248 oInFile = open(sFile, "r");
249 asLines = oInFile.readlines();
250 oInFile.close();
251
252 # Check the header.
253 if not asLines[0].startswith('Index'):
254 error('expected first line of "%s" to start with "Index"' % (sFile,));
255 return 1;
256 fHexIndex = asLines[0].find('hex') > 0;
257 iLine = 1;
258 while iLine < len(asLines):
259 # Parse.
260 asFields = asLines[iLine].split('\t');
261 try:
262 idxWin = int(asFields[0].strip(), 16 if fHexIndex else 10);
263 sWinName = asFields[1].strip();
264 sLocations = ' '.join(asFields[2].split());
265 if sWinName.find('(GMT') >= 0: raise Exception("oops #1");
266 if not sLocations.startswith('(GMT'): raise Exception("oops #2");
267 sStdOffset = sLocations[sLocations.find('(') + 1 : sLocations.find(')')].strip().replace(' ','');
268 sLocations = sLocations[sLocations.find(')') + 1 : ].strip();
269 except Exception as oXcpt:
270 error("line %u in %s: '%s'" % (iLine + 1, sFile, type(oXcpt) if not str(oXcpt) else str(oXcpt),));
271 info("'%s'" % (asLines[iLine],));
272 return 1;
273
274 # Some name adjustments.
275 sWinName = sWinName.lower();
276 if sWinName.startswith('a.u.s.'):
277 sWinName = 'aus' + sWinName[6:];
278 elif sWinName.startswith('u.s. '):
279 sWinName = 'us ' + sWinName[5:];
280 elif sWinName.startswith('s.a. '):
281 sWinName = 'sa ' + sWinName[5:];
282 elif sWinName.startswith('s.e. '):
283 sWinName = 'se ' + sWinName[5:];
284 elif sWinName.startswith('pacific s.a. '):
285 sWinName = 'pacific sa ' + sWinName[13:];
286
287 # Update zone entries with matching windows names.
288 cUpdates = 0;
289 for sZone in g_dZones:
290 oZone = g_dZones[sZone];
291 if oZone.sWinName and oZone.sWinName.lower() == sWinName:
292 oZone.idxWin = idxWin;
293 cUpdates += 1;
294 #info('idxWin=%#x - %s / %s' % (idxWin, oZone.sName, oZone.sWinName,));
295 if cUpdates == 0:
296 warning('No matching zone found for index zone "%s" (%#x, %s)' % (sWinName, idxWin, sLocations));
297
298 # Advance.
299 iLine += 1;
300 return 0;
301
302def getPadding(sField, cchWidth):
303 """ Returns space padding for the given field string. """
304 if len(sField) < cchWidth:
305 return ' ' * (cchWidth - len(sField));
306 return '';
307
308def formatFields(sName, oZone, oWinZone):
309 """ Formats the table fields. """
310
311 # RTTIMEZONEINFO:
312 # const char *pszUnixName;
313 # const char *pszWindowsName;
314 # uint8_t cchUnixName;
315 # uint8_t cchWindowsName;
316 # char szCountry[3];
317 # char szWindowsCountry[3];
318 # uint32_t idxWindows;
319 # uint32_t uReserved;
320
321 asFields = [ '"%s"' % sName, ];
322 if oWinZone.sWinName:
323 asFields.append('"%s"' % oWinZone.sWinName);
324 else:
325 asFields.append('NULL');
326
327 asFields.append('%u' % (len(sName),));
328 if oWinZone.sWinName:
329 asFields.append('%u' % (len(oWinZone.sWinName),));
330 else:
331 asFields.append('0');
332
333 asFields.append('"%s"' % (oZone.sTerritory,));
334 if oWinZone.sWinTerritory:
335 asFields.append('"%s"' % (oWinZone.sWinTerritory,));
336 else:
337 asFields.append('""');
338 asFields.append('%#010x' % (oWinZone.idxWin,));
339
340 asFlags = [];
341 if oWinZone.fWinGolden:
342 asFlags.append('RTTIMEZONEINFO_F_GOLDEN');
343 if asFlags:
344 asFields.append(' | '.join(asFlags));
345 else:
346 asFields.append('0');
347 return asFields;
348
349def produceCode(oDst):
350 """
351 Produces the tables.
352 """
353
354 #
355 # Produce the info table.
356 #
357 aasEntries = [];
358
359 # The straight zones.
360 for sZone in g_dZones:
361 asFields = formatFields(sZone, g_dZones[sZone], g_dZones[sZone]);
362 aasEntries.append(asFields);
363
364 # The links.
365 for sZone in g_dLinks:
366 oLink = g_dLinks[sZone];
367 asFields = formatFields(sZone, g_dZones[oLink.sTarget], oLink);
368 aasEntries.append(asFields);
369
370 # Figure field lengths.
371 acchFields = [ 2, 2, 2, 2, 4, 4, 10, 1 ];
372 for asFields in aasEntries:
373 assert len(asFields) == len(acchFields);
374 for iField, sField in enumerate(asFields):
375 if len(sField) > acchFields[iField]:
376 acchFields[iField] = len(sField);
377
378 # Sort the data on zone name.
379 aasEntries.sort();
380
381 # Do the formatting.
382 oDst.write('/**\n'
383 ' * Static time zone mapping info. Sorted by pszUnixName.\n'
384 ' */\n'
385 'static const RTTIMEZONEINFO g_aTimeZones[] =\n'
386 '{\n');
387 for iEntry, asFields in enumerate(aasEntries):
388 sLine = ' { ';
389 for iField, sField in enumerate(asFields):
390 sLine += sField;
391 sLine += ', ';
392 sLine += getPadding(sField, acchFields[iField]);
393 sLine += ' }, /* %#05x */\n' % (iEntry,);
394 oDst.write(sLine);
395 oDst.write('};\n'
396 '\n');
397
398 #
399 # Now produce a lookup table for windows time zone names, with indexes into
400 # the g_aTimeZone table.
401 #
402 aasLookup = [];
403 for iEntry, asFields in enumerate(aasEntries):
404 if asFields[1] != 'NULL':
405 aasLookup.append([ asFields[1], # sWinName
406 -1 if asFields[7].find('RTTIMEZONEINFO_F_GOLDEN') >= 0 else 1,
407 asFields[5], # sWinTerritory
408 iEntry,
409 asFields[0]]); # sZone
410 aasLookup.sort();
411
412 oDst.write('/**\n'
413 ' * Windows time zone lookup table. Sorted by name, golden flag and territory.\n'
414 ' */\n'
415 'static const uint16_t g_aidxWinTimeZones[] = \n'
416 '{\n');
417 for asFields in aasLookup:
418 sLine = ' %#05x, /* %s' % (asFields[3], asFields[0][1:-1]);
419 sLine += getPadding(asFields[0], acchFields[1]);
420 sLine += ' / %s%s' % (asFields[2][1:-1], '+' if asFields[1] < 0 else ' ');
421 if len(asFields[2]) == 2:
422 sLine += ' ';
423 sLine += ' ==> %s */\n' % (asFields[4][1:-1],)
424 oDst.write(sLine);
425
426 oDst.write('};\n'
427 '\n');
428
429 return 0;
430
431
432def main(asArgs):
433 """
434 C-like main function.
435 """
436 if len(asArgs) != 4:
437 error("Takes exacty three arguments: <ms-index-file> <ms-key-file> <tz-data-dir>");
438 return 1;
439 sTzDataDir = asArgs[1];
440 sWinToTzMap = asArgs[2];
441 sWinIndexTable = asArgs[3];
442
443 #
444 # Read in the data first.
445 #
446 iRc = readTzData(sTzDataDir);
447 if iRc == 0:
448 iRc = readWindowsToTzMap(sWinToTzMap);
449 if iRc == 0:
450 iRc = readWindowsIndexes(sWinIndexTable);
451 if iRc == 0:
452 #
453 # Produce the C table.
454 #
455 iRc = produceCode(sys.stdout);
456 return iRc;
457
458if __name__ == '__main__':
459 sys.exit(main(sys.argv));
460
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette