/usr/lib/python2.7/dist-packages/guessit/date.py is in python-guessit 0.11.0-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | #!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import datetime
import re
from dateutil import parser
_dsep = r'[-/ \.]'
_dsep_bis = r'[-/ \.x]'
date_regexps = [
re.compile('%s(\d{8})%s' % (_dsep, _dsep), re.IGNORECASE),
re.compile('%s(\d{6})%s' % (_dsep, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{2})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{4})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep_bis, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{4})[^\d]' % (_dsep, _dsep_bis), re.IGNORECASE),
re.compile('[^\d](\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4})[^\d]' % (_dsep, _dsep), re.IGNORECASE)]
def valid_year(year, today=None):
"""Check if number is a valid year"""
if not today:
today = datetime.date.today()
return 1920 < year < today.year + 5
def search_year(string):
"""Looks for year patterns, and if found return the year and group span.
Assumes there are sentinels at the beginning and end of the string that
always allow matching a non-digit delimiting the date.
Note this only looks for valid production years, that is between 1920
and now + 5 years, so for instance 2000 would be returned as a valid
year but 1492 would not.
>>> search_year(' in the year 2000... ')
(2000, (13, 17))
>>> search_year(' they arrived in 1492. ')
(None, None)
"""
match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
if match:
year = int(match.group(1))
if valid_year(year):
return year, match.span(1)
return None, None
def _is_int(string):
"""
Check if the input string is an integer
:param string:
:type string:
:return:
:rtype:
"""
try:
int(string)
return True
except ValueError:
return False
def _guess_day_first_parameter(groups):
"""
If day_first is not defined, use some heuristic to fix it.
It helps to solve issues with python dateutils 2.5.3 parser changes.
:param groups: match groups found for the date
:type groups: list of match objects
:return: day_first option guessed value
:rtype: bool
"""
# If match starts with a long year, then day_first is force to false.
if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
return False
# If match ends with a long year, the day_first is forced to true.
elif _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
return True
# If match starts with a short year, then day_first is force to false.
elif _is_int(groups[0]) and int(groups[0][:2]) > 31:
return False
# If match ends with a short year, then day_first is force to true.
elif _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
return True
def search_date(string, year_first=None, day_first=None):
"""Looks for date patterns, and if found return the date and group span.
Assumes there are sentinels at the beginning and end of the string that
always allow matching a non-digit delimiting the date.
Year can be defined on two digit only. It will return the nearest possible
date from today.
>>> search_date(' This happened on 2002-04-22. ')
(datetime.date(2002, 4, 22), (18, 28))
>>> search_date(' And this on 17-06-1998. ')
(datetime.date(1998, 6, 17), (13, 23))
>>> search_date(' no date in here ')
(None, None)
"""
start, end = None, None
match = None
groups = None
for date_re in date_regexps:
s = date_re.search(string)
if s and (match is None or s.end() - s.start() > len(match)):
start, end = s.start(), s.end()
if date_re.groups:
groups = s.groups()
match = '-'.join(groups)
else:
match = s.group()
groups = [match]
if match is None:
return None, None
today = datetime.date.today()
if year_first and day_first is None:
day_first = False
if day_first is None:
day_first = _guess_day_first_parameter(groups)
# If day_first/year_first is undefined, parse is made using both possible values.
yearfirst_opts = [False, True]
if year_first is not None:
yearfirst_opts = [year_first]
dayfirst_opts = [True, False]
if day_first is not None:
dayfirst_opts = [day_first]
kwargs_list = ({'dayfirst': d, 'yearfirst': y} for d in dayfirst_opts for y in yearfirst_opts)
for kwargs in kwargs_list:
try:
date = parser.parse(match, **kwargs)
except (ValueError, TypeError) as e: #see https://bugs.launchpad.net/dateutil/+bug/1247643
date = None
pass
# check date plausibility
if date and valid_year(date.year, today=today):
return date.date(), (start+1, end-1) #compensate for sentinels
return None, None
|