/usr/share/pyshared/CedarBackup2/tools/amazons3.py is in cedar-backup2 2.26.5-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 | # -*- coding: iso-8859-1 -*-
# vim: set ft=python ts=3 sw=3 expandtab:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# C E D A R
# S O L U T I O N S "Software done right."
# S O F T W A R E
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Copyright (c) 2014 Kenneth J. Pronovici.
# All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# Version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Copies of the GNU General Public License are available from
# the Free Software Foundation website, http://www.gnu.org/.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Author : Kenneth J. Pronovici <pronovic@ieee.org>
# Language : Python 2 (>= 2.7)
# Project : Cedar Backup, release 2
# Purpose : Cedar Backup tool to synchronize an Amazon S3 bucket.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
########################################################################
# Notes
########################################################################
"""
Synchonizes a local directory with an Amazon S3 bucket.
No configuration is required; all necessary information is taken from the
command-line. The only thing configuration would help with is the path
resolver interface, and it doesn't seem worth it to require configuration just
to get that.
@author: Kenneth J. Pronovici <pronovic@ieee.org>
"""
########################################################################
# Imported modules and constants
########################################################################
# System modules
import sys
import os
import logging
import getopt
import json
import warnings
import chardet
# Cedar Backup modules
from CedarBackup2.release import AUTHOR, EMAIL, VERSION, DATE, COPYRIGHT
from CedarBackup2.filesystem import FilesystemList
from CedarBackup2.cli import setupLogging, DEFAULT_LOGFILE, DEFAULT_OWNERSHIP, DEFAULT_MODE
from CedarBackup2.util import Diagnostics, splitCommandLine, encodePath
from CedarBackup2.util import executeCommand
########################################################################
# Module-wide constants and variables
########################################################################
logger = logging.getLogger("CedarBackup2.log.tools.amazons3")
AWS_COMMAND = [ "aws" ]
SHORT_SWITCHES = "hVbql:o:m:OdsDvw"
LONG_SWITCHES = [ 'help', 'version', 'verbose', 'quiet',
'logfile=', 'owner=', 'mode=',
'output', 'debug', 'stack', 'diagnostics',
'verifyOnly', 'ignoreWarnings', ]
#######################################################################
# Options class
#######################################################################
class Options(object):
######################
# Class documentation
######################
"""
Class representing command-line options for the cback-amazons3-sync script.
The C{Options} class is a Python object representation of the command-line
options of the cback script.
The object representation is two-way: a command line string or a list of
command line arguments can be used to create an C{Options} object, and then
changes to the object can be propogated back to a list of command-line
arguments or to a command-line string. An C{Options} object can even be
created from scratch programmatically (if you have a need for that).
There are two main levels of validation in the C{Options} class. The first
is field-level validation. Field-level validation comes into play when a
given field in an object is assigned to or updated. We use Python's
C{property} functionality to enforce specific validations on field values,
and in some places we even use customized list classes to enforce
validations on list members. You should expect to catch a C{ValueError}
exception when making assignments to fields if you are programmatically
filling an object.
The second level of validation is post-completion validation. Certain
validations don't make sense until an object representation of options is
fully "complete". We don't want these validations to apply all of the time,
because it would make building up a valid object from scratch a real pain.
For instance, we might have to do things in the right order to keep from
throwing exceptions, etc.
All of these post-completion validations are encapsulated in the
L{Options.validate} method. This method can be called at any time by a
client, and will always be called immediately after creating a C{Options}
object from a command line and before exporting a C{Options} object back to
a command line. This way, we get acceptable ease-of-use but we also don't
accept or emit invalid command lines.
@note: Lists within this class are "unordered" for equality comparisons.
@sort: __init__, __repr__, __str__, __cmp__
"""
##############
# Constructor
##############
def __init__(self, argumentList=None, argumentString=None, validate=True):
"""
Initializes an options object.
If you initialize the object without passing either C{argumentList} or
C{argumentString}, the object will be empty and will be invalid until it
is filled in properly.
No reference to the original arguments is saved off by this class. Once
the data has been parsed (successfully or not) this original information
is discarded.
The argument list is assumed to be a list of arguments, not including the
name of the command, something like C{sys.argv[1:]}. If you pass
C{sys.argv} instead, things are not going to work.
The argument string will be parsed into an argument list by the
L{util.splitCommandLine} function (see the documentation for that
function for some important notes about its limitations). There is an
assumption that the resulting list will be equivalent to C{sys.argv[1:]},
just like C{argumentList}.
Unless the C{validate} argument is C{False}, the L{Options.validate}
method will be called (with its default arguments) after successfully
parsing any passed-in command line. This validation ensures that
appropriate actions, etc. have been specified. Keep in mind that even if
C{validate} is C{False}, it might not be possible to parse the passed-in
command line, so an exception might still be raised.
@note: The command line format is specified by the L{_usage} function.
Call L{_usage} to see a usage statement for the cback script.
@note: It is strongly suggested that the C{validate} option always be set
to C{True} (the default) unless there is a specific need to read in
invalid command line arguments.
@param argumentList: Command line for a program.
@type argumentList: List of arguments, i.e. C{sys.argv}
@param argumentString: Command line for a program.
@type argumentString: String, i.e. "cback --verbose stage store"
@param validate: Validate the command line after parsing it.
@type validate: Boolean true/false.
@raise getopt.GetoptError: If the command-line arguments could not be parsed.
@raise ValueError: If the command-line arguments are invalid.
"""
self._help = False
self._version = False
self._verbose = False
self._quiet = False
self._logfile = None
self._owner = None
self._mode = None
self._output = False
self._debug = False
self._stacktrace = False
self._diagnostics = False
self._verifyOnly = False
self._ignoreWarnings = False
self._sourceDir = None
self._s3BucketUrl = None
if argumentList is not None and argumentString is not None:
raise ValueError("Use either argumentList or argumentString, but not both.")
if argumentString is not None:
argumentList = splitCommandLine(argumentString)
if argumentList is not None:
self._parseArgumentList(argumentList)
if validate:
self.validate()
#########################
# String representations
#########################
def __repr__(self):
"""
Official string representation for class instance.
"""
return self.buildArgumentString(validate=False)
def __str__(self):
"""
Informal string representation for class instance.
"""
return self.__repr__()
#############################
# Standard comparison method
#############################
def __cmp__(self, other):
"""
Definition of equals operator for this class.
Lists within this class are "unordered" for equality comparisons.
@param other: Other object to compare to.
@return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
"""
if other is None:
return 1
if self.help != other.help:
if self.help < other.help:
return -1
else:
return 1
if self.version != other.version:
if self.version < other.version:
return -1
else:
return 1
if self.verbose != other.verbose:
if self.verbose < other.verbose:
return -1
else:
return 1
if self.quiet != other.quiet:
if self.quiet < other.quiet:
return -1
else:
return 1
if self.logfile != other.logfile:
if self.logfile < other.logfile:
return -1
else:
return 1
if self.owner != other.owner:
if self.owner < other.owner:
return -1
else:
return 1
if self.mode != other.mode:
if self.mode < other.mode:
return -1
else:
return 1
if self.output != other.output:
if self.output < other.output:
return -1
else:
return 1
if self.debug != other.debug:
if self.debug < other.debug:
return -1
else:
return 1
if self.stacktrace != other.stacktrace:
if self.stacktrace < other.stacktrace:
return -1
else:
return 1
if self.diagnostics != other.diagnostics:
if self.diagnostics < other.diagnostics:
return -1
else:
return 1
if self.verifyOnly != other.verifyOnly:
if self.verifyOnly < other.verifyOnly:
return -1
else:
return 1
if self.ignoreWarnings != other.ignoreWarnings:
if self.ignoreWarnings < other.ignoreWarnings:
return -1
else:
return 1
if self.sourceDir != other.sourceDir:
if self.sourceDir < other.sourceDir:
return -1
else:
return 1
if self.s3BucketUrl != other.s3BucketUrl:
if self.s3BucketUrl < other.s3BucketUrl:
return -1
else:
return 1
return 0
#############
# Properties
#############
def _setHelp(self, value):
"""
Property target used to set the help flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._help = True
else:
self._help = False
def _getHelp(self):
"""
Property target used to get the help flag.
"""
return self._help
def _setVersion(self, value):
"""
Property target used to set the version flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._version = True
else:
self._version = False
def _getVersion(self):
"""
Property target used to get the version flag.
"""
return self._version
def _setVerbose(self, value):
"""
Property target used to set the verbose flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._verbose = True
else:
self._verbose = False
def _getVerbose(self):
"""
Property target used to get the verbose flag.
"""
return self._verbose
def _setQuiet(self, value):
"""
Property target used to set the quiet flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._quiet = True
else:
self._quiet = False
def _getQuiet(self):
"""
Property target used to get the quiet flag.
"""
return self._quiet
def _setLogfile(self, value):
"""
Property target used to set the logfile parameter.
@raise ValueError: If the value cannot be encoded properly.
"""
if value is not None:
if len(value) < 1:
raise ValueError("The logfile parameter must be a non-empty string.")
self._logfile = encodePath(value)
def _getLogfile(self):
"""
Property target used to get the logfile parameter.
"""
return self._logfile
def _setOwner(self, value):
"""
Property target used to set the owner parameter.
If not C{None}, the owner must be a C{(user,group)} tuple or list.
Strings (and inherited children of strings) are explicitly disallowed.
The value will be normalized to a tuple.
@raise ValueError: If the value is not valid.
"""
if value is None:
self._owner = None
else:
if isinstance(value, str):
raise ValueError("Must specify user and group tuple for owner parameter.")
if len(value) != 2:
raise ValueError("Must specify user and group tuple for owner parameter.")
if len(value[0]) < 1 or len(value[1]) < 1:
raise ValueError("User and group tuple values must be non-empty strings.")
self._owner = (value[0], value[1])
def _getOwner(self):
"""
Property target used to get the owner parameter.
The parameter is a tuple of C{(user, group)}.
"""
return self._owner
def _setMode(self, value):
"""
Property target used to set the mode parameter.
"""
if value is None:
self._mode = None
else:
try:
if isinstance(value, str):
value = int(value, 8)
else:
value = int(value)
except TypeError:
raise ValueError("Mode must be an octal integer >= 0, i.e. 644.")
if value < 0:
raise ValueError("Mode must be an octal integer >= 0. i.e. 644.")
self._mode = value
def _getMode(self):
"""
Property target used to get the mode parameter.
"""
return self._mode
def _setOutput(self, value):
"""
Property target used to set the output flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._output = True
else:
self._output = False
def _getOutput(self):
"""
Property target used to get the output flag.
"""
return self._output
def _setDebug(self, value):
"""
Property target used to set the debug flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._debug = True
else:
self._debug = False
def _getDebug(self):
"""
Property target used to get the debug flag.
"""
return self._debug
def _setStacktrace(self, value):
"""
Property target used to set the stacktrace flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._stacktrace = True
else:
self._stacktrace = False
def _getStacktrace(self):
"""
Property target used to get the stacktrace flag.
"""
return self._stacktrace
def _setDiagnostics(self, value):
"""
Property target used to set the diagnostics flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._diagnostics = True
else:
self._diagnostics = False
def _getDiagnostics(self):
"""
Property target used to get the diagnostics flag.
"""
return self._diagnostics
def _setVerifyOnly(self, value):
"""
Property target used to set the verifyOnly flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._verifyOnly = True
else:
self._verifyOnly = False
def _getVerifyOnly(self):
"""
Property target used to get the verifyOnly flag.
"""
return self._verifyOnly
def _setIgnoreWarnings(self, value):
"""
Property target used to set the ignoreWarnings flag.
No validations, but we normalize the value to C{True} or C{False}.
"""
if value:
self._ignoreWarnings = True
else:
self._ignoreWarnings = False
def _getIgnoreWarnings(self):
"""
Property target used to get the ignoreWarnings flag.
"""
return self._ignoreWarnings
def _setSourceDir(self, value):
"""
Property target used to set the sourceDir parameter.
"""
if value is not None:
if len(value) < 1:
raise ValueError("The sourceDir parameter must be a non-empty string.")
self._sourceDir = value
def _getSourceDir(self):
"""
Property target used to get the sourceDir parameter.
"""
return self._sourceDir
def _setS3BucketUrl(self, value):
"""
Property target used to set the s3BucketUrl parameter.
"""
if value is not None:
if len(value) < 1:
raise ValueError("The s3BucketUrl parameter must be a non-empty string.")
self._s3BucketUrl = value
def _getS3BucketUrl(self):
"""
Property target used to get the s3BucketUrl parameter.
"""
return self._s3BucketUrl
help = property(_getHelp, _setHelp, None, "Command-line help (C{-h,--help}) flag.")
version = property(_getVersion, _setVersion, None, "Command-line version (C{-V,--version}) flag.")
verbose = property(_getVerbose, _setVerbose, None, "Command-line verbose (C{-b,--verbose}) flag.")
quiet = property(_getQuiet, _setQuiet, None, "Command-line quiet (C{-q,--quiet}) flag.")
logfile = property(_getLogfile, _setLogfile, None, "Command-line logfile (C{-l,--logfile}) parameter.")
owner = property(_getOwner, _setOwner, None, "Command-line owner (C{-o,--owner}) parameter, as tuple C{(user,group)}.")
mode = property(_getMode, _setMode, None, "Command-line mode (C{-m,--mode}) parameter.")
output = property(_getOutput, _setOutput, None, "Command-line output (C{-O,--output}) flag.")
debug = property(_getDebug, _setDebug, None, "Command-line debug (C{-d,--debug}) flag.")
stacktrace = property(_getStacktrace, _setStacktrace, None, "Command-line stacktrace (C{-s,--stack}) flag.")
diagnostics = property(_getDiagnostics, _setDiagnostics, None, "Command-line diagnostics (C{-D,--diagnostics}) flag.")
verifyOnly = property(_getVerifyOnly, _setVerifyOnly, None, "Command-line verifyOnly (C{-v,--verifyOnly}) flag.")
ignoreWarnings = property(_getIgnoreWarnings, _setIgnoreWarnings, None, "Command-line ignoreWarnings (C{-w,--ignoreWarnings}) flag.")
sourceDir = property(_getSourceDir, _setSourceDir, None, "Command-line sourceDir, source of sync.")
s3BucketUrl = property(_getS3BucketUrl, _setS3BucketUrl, None, "Command-line s3BucketUrl, target of sync.")
##################
# Utility methods
##################
def validate(self):
"""
Validates command-line options represented by the object.
Unless C{--help} or C{--version} are supplied, at least one action must
be specified. Other validations (as for allowed values for particular
options) will be taken care of at assignment time by the properties
functionality.
@note: The command line format is specified by the L{_usage} function.
Call L{_usage} to see a usage statement for the cback script.
@raise ValueError: If one of the validations fails.
"""
if not self.help and not self.version and not self.diagnostics:
if self.sourceDir is None or self.s3BucketUrl is None:
raise ValueError("Source directory and S3 bucket URL are both required.")
def buildArgumentList(self, validate=True):
"""
Extracts options into a list of command line arguments.
The original order of the various arguments (if, indeed, the object was
initialized with a command-line) is not preserved in this generated
argument list. Besides that, the argument list is normalized to use the
long option names (i.e. --version rather than -V). The resulting list
will be suitable for passing back to the constructor in the
C{argumentList} parameter. Unlike L{buildArgumentString}, string
arguments are not quoted here, because there is no need for it.
Unless the C{validate} parameter is C{False}, the L{Options.validate}
method will be called (with its default arguments) against the
options before extracting the command line. If the options are not valid,
then an argument list will not be extracted.
@note: It is strongly suggested that the C{validate} option always be set
to C{True} (the default) unless there is a specific need to extract an
invalid command line.
@param validate: Validate the options before extracting the command line.
@type validate: Boolean true/false.
@return: List representation of command-line arguments.
@raise ValueError: If options within the object are invalid.
"""
if validate:
self.validate()
argumentList = []
if self._help:
argumentList.append("--help")
if self.version:
argumentList.append("--version")
if self.verbose:
argumentList.append("--verbose")
if self.quiet:
argumentList.append("--quiet")
if self.logfile is not None:
argumentList.append("--logfile")
argumentList.append(self.logfile)
if self.owner is not None:
argumentList.append("--owner")
argumentList.append("%s:%s" % (self.owner[0], self.owner[1]))
if self.mode is not None:
argumentList.append("--mode")
argumentList.append("%o" % self.mode)
if self.output:
argumentList.append("--output")
if self.debug:
argumentList.append("--debug")
if self.stacktrace:
argumentList.append("--stack")
if self.diagnostics:
argumentList.append("--diagnostics")
if self.verifyOnly:
argumentList.append("--verifyOnly")
if self.ignoreWarnings:
argumentList.append("--ignoreWarnings")
if self.sourceDir is not None:
argumentList.append(self.sourceDir)
if self.s3BucketUrl is not None:
argumentList.append(self.s3BucketUrl)
return argumentList
def buildArgumentString(self, validate=True):
"""
Extracts options into a string of command-line arguments.
The original order of the various arguments (if, indeed, the object was
initialized with a command-line) is not preserved in this generated
argument string. Besides that, the argument string is normalized to use
the long option names (i.e. --version rather than -V) and to quote all
string arguments with double quotes (C{"}). The resulting string will be
suitable for passing back to the constructor in the C{argumentString}
parameter.
Unless the C{validate} parameter is C{False}, the L{Options.validate}
method will be called (with its default arguments) against the options
before extracting the command line. If the options are not valid, then
an argument string will not be extracted.
@note: It is strongly suggested that the C{validate} option always be set
to C{True} (the default) unless there is a specific need to extract an
invalid command line.
@param validate: Validate the options before extracting the command line.
@type validate: Boolean true/false.
@return: String representation of command-line arguments.
@raise ValueError: If options within the object are invalid.
"""
if validate:
self.validate()
argumentString = ""
if self._help:
argumentString += "--help "
if self.version:
argumentString += "--version "
if self.verbose:
argumentString += "--verbose "
if self.quiet:
argumentString += "--quiet "
if self.logfile is not None:
argumentString += "--logfile \"%s\" " % self.logfile
if self.owner is not None:
argumentString += "--owner \"%s:%s\" " % (self.owner[0], self.owner[1])
if self.mode is not None:
argumentString += "--mode %o " % self.mode
if self.output:
argumentString += "--output "
if self.debug:
argumentString += "--debug "
if self.stacktrace:
argumentString += "--stack "
if self.diagnostics:
argumentString += "--diagnostics "
if self.verifyOnly:
argumentString += "--verifyOnly "
if self.ignoreWarnings:
argumentString += "--ignoreWarnings "
if self.sourceDir is not None:
argumentString += "\"%s\" " % self.sourceDir
if self.s3BucketUrl is not None:
argumentString += "\"%s\" " % self.s3BucketUrl
return argumentString
def _parseArgumentList(self, argumentList):
"""
Internal method to parse a list of command-line arguments.
Most of the validation we do here has to do with whether the arguments
can be parsed and whether any values which exist are valid. We don't do
any validation as to whether required elements exist or whether elements
exist in the proper combination (instead, that's the job of the
L{validate} method).
For any of the options which supply parameters, if the option is
duplicated with long and short switches (i.e. C{-l} and a C{--logfile})
then the long switch is used. If the same option is duplicated with the
same switch (long or short), then the last entry on the command line is
used.
@param argumentList: List of arguments to a command.
@type argumentList: List of arguments to a command, i.e. C{sys.argv[1:]}
@raise ValueError: If the argument list cannot be successfully parsed.
"""
switches = { }
opts, remaining = getopt.getopt(argumentList, SHORT_SWITCHES, LONG_SWITCHES)
for o, a in opts: # push the switches into a hash
switches[o] = a
if switches.has_key("-h") or switches.has_key("--help"):
self.help = True
if switches.has_key("-V") or switches.has_key("--version"):
self.version = True
if switches.has_key("-b") or switches.has_key("--verbose"):
self.verbose = True
if switches.has_key("-q") or switches.has_key("--quiet"):
self.quiet = True
if switches.has_key("-l"):
self.logfile = switches["-l"]
if switches.has_key("--logfile"):
self.logfile = switches["--logfile"]
if switches.has_key("-o"):
self.owner = switches["-o"].split(":", 1)
if switches.has_key("--owner"):
self.owner = switches["--owner"].split(":", 1)
if switches.has_key("-m"):
self.mode = switches["-m"]
if switches.has_key("--mode"):
self.mode = switches["--mode"]
if switches.has_key("-O") or switches.has_key("--output"):
self.output = True
if switches.has_key("-d") or switches.has_key("--debug"):
self.debug = True
if switches.has_key("-s") or switches.has_key("--stack"):
self.stacktrace = True
if switches.has_key("-D") or switches.has_key("--diagnostics"):
self.diagnostics = True
if switches.has_key("-v") or switches.has_key("--verifyOnly"):
self.verifyOnly = True
if switches.has_key("-w") or switches.has_key("--ignoreWarnings"):
self.ignoreWarnings = True
try:
(self.sourceDir, self.s3BucketUrl) = remaining
except ValueError:
pass
#######################################################################
# Public functions
#######################################################################
#################
# cli() function
#################
def cli():
"""
Implements the command-line interface for the C{cback-amazons3-sync} script.
Essentially, this is the "main routine" for the cback-amazons3-sync script. It does
all of the argument processing for the script, and then also implements the
tool functionality.
This function looks pretty similiar to C{CedarBackup2.cli.cli()}. It's not
easy to refactor this code to make it reusable and also readable, so I've
decided to just live with the duplication.
A different error code is returned for each type of failure:
- C{1}: The Python interpreter version is < 2.7
- C{2}: Error processing command-line arguments
- C{3}: Error configuring logging
- C{5}: Backup was interrupted with a CTRL-C or similar
- C{6}: Error executing other parts of the script
@note: This script uses print rather than logging to the INFO level, because
it is interactive. Underlying Cedar Backup functionality uses the logging
mechanism exclusively.
@return: Error code as described above.
"""
try:
if map(int, [sys.version_info[0], sys.version_info[1]]) < [2, 7]:
sys.stderr.write("Python 2 version 2.7 or greater required.\n")
return 1
except:
# sys.version_info isn't available before 2.0
sys.stderr.write("Python 2 version 2.7 or greater required.\n")
return 1
try:
options = Options(argumentList=sys.argv[1:])
except Exception, e:
_usage()
sys.stderr.write(" *** Error: %s\n" % e)
return 2
if options.help:
_usage()
return 0
if options.version:
_version()
return 0
if options.diagnostics:
_diagnostics()
return 0
if options.stacktrace:
logfile = setupLogging(options)
else:
try:
logfile = setupLogging(options)
except Exception as e:
sys.stderr.write("Error setting up logging: %s\n" % e)
return 3
logger.info("Cedar Backup Amazon S3 sync run started.")
logger.info("Options were [%s]", options)
logger.info("Logfile is [%s]", logfile)
Diagnostics().logDiagnostics(method=logger.info)
if options.stacktrace:
_executeAction(options)
else:
try:
_executeAction(options)
except KeyboardInterrupt:
logger.error("Backup interrupted.")
logger.info("Cedar Backup Amazon S3 sync run completed with status 5.")
return 5
except Exception, e:
logger.error("Error executing backup: %s", e)
logger.info("Cedar Backup Amazon S3 sync run completed with status 6.")
return 6
logger.info("Cedar Backup Amazon S3 sync run completed with status 0.")
return 0
#######################################################################
# Utility functions
#######################################################################
####################
# _usage() function
####################
def _usage(fd=sys.stderr):
"""
Prints usage information for the cback-amazons3-sync script.
@param fd: File descriptor used to print information.
@note: The C{fd} is used rather than C{print} to facilitate unit testing.
"""
fd.write("\n")
fd.write(" Usage: cback-amazons3-sync [switches] sourceDir s3bucketUrl\n")
fd.write("\n")
fd.write(" Cedar Backup Amazon S3 sync tool.\n")
fd.write("\n")
fd.write(" This Cedar Backup utility synchronizes a local directory to an Amazon S3\n")
fd.write(" bucket. After the sync is complete, a validation step is taken. An\n")
fd.write(" error is reported if the contents of the bucket do not match the\n")
fd.write(" source directory, or if the indicated size for any file differs.\n")
fd.write(" This tool is a wrapper over the AWS CLI command-line tool.\n")
fd.write("\n")
fd.write(" The following arguments are required:\n")
fd.write("\n")
fd.write(" sourceDir The local source directory on disk (must exist)\n")
fd.write(" s3BucketUrl The URL to the target Amazon S3 bucket\n")
fd.write("\n")
fd.write(" The following switches are accepted:\n")
fd.write("\n")
fd.write(" -h, --help Display this usage/help listing\n")
fd.write(" -V, --version Display version information\n")
fd.write(" -b, --verbose Print verbose output as well as logging to disk\n")
fd.write(" -q, --quiet Run quietly (display no output to the screen)\n")
fd.write(" -l, --logfile Path to logfile (default: %s)\n" % DEFAULT_LOGFILE)
fd.write(" -o, --owner Logfile ownership, user:group (default: %s:%s)\n" % (DEFAULT_OWNERSHIP[0], DEFAULT_OWNERSHIP[1]))
fd.write(" -m, --mode Octal logfile permissions mode (default: %o)\n" % DEFAULT_MODE)
fd.write(" -O, --output Record some sub-command (i.e. aws) output to the log\n")
fd.write(" -d, --debug Write debugging information to the log (implies --output)\n")
fd.write(" -s, --stack Dump Python stack trace instead of swallowing exceptions\n") # exactly 80 characters in width!
fd.write(" -D, --diagnostics Print runtime diagnostics to the screen and exit\n")
fd.write(" -v, --verifyOnly Only verify the S3 bucket contents, do not make changes\n")
fd.write(" -w, --ignoreWarnings Ignore warnings about problematic filename encodings\n")
fd.write("\n")
fd.write(" Typical usage would be something like:\n")
fd.write("\n")
fd.write(" cback-amazons3-sync /home/myuser s3://example.com-backup/myuser\n")
fd.write("\n")
fd.write(" This will sync the contents of /home/myuser into the indicated bucket.\n")
fd.write("\n")
######################
# _version() function
######################
def _version(fd=sys.stdout):
"""
Prints version information for the cback script.
@param fd: File descriptor used to print information.
@note: The C{fd} is used rather than C{print} to facilitate unit testing.
"""
fd.write("\n")
fd.write(" Cedar Backup Amazon S3 sync tool.\n")
fd.write(" Included with Cedar Backup version %s, released %s.\n" % (VERSION, DATE))
fd.write("\n")
fd.write(" Copyright (c) %s %s <%s>.\n" % (COPYRIGHT, AUTHOR, EMAIL))
fd.write(" See CREDITS for a list of included code and other contributors.\n")
fd.write(" This is free software; there is NO warranty. See the\n")
fd.write(" GNU General Public License version 2 for copying conditions.\n")
fd.write("\n")
fd.write(" Use the --help option for usage information.\n")
fd.write("\n")
##########################
# _diagnostics() function
##########################
def _diagnostics(fd=sys.stdout):
"""
Prints runtime diagnostics information.
@param fd: File descriptor used to print information.
@note: The C{fd} is used rather than C{print} to facilitate unit testing.
"""
fd.write("\n")
fd.write("Diagnostics:\n")
fd.write("\n")
Diagnostics().printDiagnostics(fd=fd, prefix=" ")
fd.write("\n")
############################
# _executeAction() function
############################
def _executeAction(options):
"""
Implements the guts of the cback-amazons3-sync tool.
@param options: Program command-line options.
@type options: Options object.
@raise Exception: Under many generic error conditions
"""
sourceFiles = _buildSourceFiles(options.sourceDir)
if not options.ignoreWarnings:
_checkSourceFiles(options.sourceDir, sourceFiles)
if not options.verifyOnly:
_synchronizeBucket(options.sourceDir, options.s3BucketUrl)
_verifyBucketContents(options.sourceDir, sourceFiles, options.s3BucketUrl)
################################
# _buildSourceFiles() function
################################
def _buildSourceFiles(sourceDir):
"""
Build a list of files in a source directory
@param sourceDir: Local source directory
@return: FilesystemList with contents of source directory
"""
if not os.path.isdir(sourceDir):
raise ValueError("Source directory does not exist on disk.")
sourceFiles = FilesystemList()
sourceFiles.addDirContents(sourceDir)
return sourceFiles
###############################
# _checkSourceFiles() function
###############################
def _checkSourceFiles(sourceDir, sourceFiles):
"""
Check source files, trying to guess which ones will have encoding problems.
@param sourceDir: Local source directory
@param sourceDir: Local source directory
@raises ValueError: If a problem file is found
@see U{http://opensourcehacker.com/2011/09/16/fix-linux-filename-encodings-with-python/}
@see U{http://serverfault.com/questions/82821/how-to-tell-the-language-encoding-of-a-filename-on-linux}
@see U{http://randysofia.com/2014/06/06/aws-cli-and-your-locale/}
"""
with warnings.catch_warnings():
warnings.simplefilter("ignore") # So we don't print unicode warnings from comparisons
encoding = Diagnostics().encoding
failed = False
for entry in sourceFiles:
result = chardet.detect(entry)
source = entry.decode(result["encoding"])
try:
target = source.encode(encoding)
if source != target:
logger.error("Inconsistent encoding for [%s]: got %s, but need %s", entry, result["encoding"], encoding)
failed = True
except UnicodeEncodeError:
logger.error("Inconsistent encoding for [%s]: got %s, but need %s", entry, result["encoding"], encoding)
failed = True
if not failed:
logger.info("Completed checking source filename encoding (no problems found).")
else:
logger.error("Some filenames have inconsistent encodings and will likely cause sync problems.")
logger.error("You may be able to fix this by setting a more sensible locale in your environment.")
logger.error("Aternately, you can rename the problem files to be valid in the indicated locale.")
logger.error("To ignore this warning and proceed anyway, use --ignoreWarnings")
raise ValueError("Some filenames have inconsistent encodings and will likely cause sync problems.")
################################
# _synchronizeBucket() function
################################
def _synchronizeBucket(sourceDir, s3BucketUrl):
"""
Synchronize a local directory to an Amazon S3 bucket.
@param sourceDir: Local source directory
@param s3BucketUrl: Target S3 bucket URL
"""
logger.info("Synchronizing local source directory up to Amazon S3.")
args = [ "s3", "sync", sourceDir, s3BucketUrl, "--delete", "--recursive", ]
result = executeCommand(AWS_COMMAND, args, returnOutput=False)[0]
if result != 0:
raise IOError("Error [%d] calling AWS CLI synchronize bucket." % result)
###################################
# _verifyBucketContents() function
###################################
def _verifyBucketContents(sourceDir, sourceFiles, s3BucketUrl):
"""
Verify that a source directory is equivalent to an Amazon S3 bucket.
@param sourceDir: Local source directory
@param sourceFiles: Filesystem list containing contents of source directory
@param s3BucketUrl: Target S3 bucket URL
"""
# As of this writing, the documentation for the S3 API that we're using
# below says that up to 1000 elements at a time are returned, and that we
# have to manually handle pagination by looking for the IsTruncated element.
# However, in practice, this is not true. I have been testing with
# "aws-cli/1.4.4 Python/2.7.3 Linux/3.2.0-4-686-pae", installed through PIP.
# No matter how many items exist in my bucket and prefix, I get back a
# single JSON result. I've tested with buckets containing nearly 6000
# elements.
#
# If I turn on debugging, it's clear that underneath, something in the API
# is executing multiple list-object requests against AWS, and stiching
# results together to give me back the final JSON result. The debug output
# clearly incldues multiple requests, and each XML response (except for the
# final one) contains <IsTruncated>true</IsTruncated>.
#
# This feature is not mentioned in the offical changelog for any of the
# releases going back to 1.0.0. It appears to happen in the botocore
# library, but I'll admit I can't actually find the code that implements it.
# For now, all I can do is rely on this behavior and hope that the
# documentation is out-of-date. I'm not going to write code that tries to
# parse out IsTruncated if I can't actually test that code.
(bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1)
query = "Contents[].{Key: Key, Size: Size}"
args = [ "s3api", "list-objects", "--bucket", bucket, "--prefix", prefix, "--query", query, ]
(result, data) = executeCommand(AWS_COMMAND, args, returnOutput=True)
if result != 0:
raise IOError("Error [%d] calling AWS CLI verify bucket contents." % result)
contents = { }
for entry in json.loads("".join(data)):
key = entry["Key"].replace(prefix, "")
size = long(entry["Size"])
contents[key] = size
failed = False
for entry in sourceFiles:
if os.path.isfile(entry):
key = entry.replace(sourceDir, "")
size = long(os.stat(entry).st_size)
if not key in contents:
logger.error("File was apparently not uploaded: [%s]", entry)
failed = True
else:
if size != contents[key]:
logger.error("File size differs [%s]: expected %s bytes but got %s bytes", entry, size, contents[key])
failed = True
if not failed:
logger.info("Completed verifying Amazon S3 bucket contents (no problems found).")
else:
logger.error("There were differences between source directory and target S3 bucket.")
raise ValueError("There were differences between source directory and target S3 bucket.")
#########################################################################
# Main routine
########################################################################
if __name__ == "__main__":
sys.exit(cli())
|