This file is indexed.

/usr/share/psi/python/inputparser.py is in psi4-data 1:0.3-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
#
#@BEGIN LICENSE
#
# PSI4: an ab initio quantum chemistry software package
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
#@END LICENSE
#

## Force Python 3 print syntax, if this is python 2.X
#if sys.hexversion < 0x03000000:
from __future__ import print_function

"""Module with functions to parse the input file and convert
Psithon into standard Python. Particularly, forms psi4
module calls that access the C++ side of Psi4.

"""
import re
import os
import sys
import random
#CUimport psi4
import pubchem
#CUfrom psiexceptions import *
from p4xcpt import *  # CU


# inputfile contents to be preserved from the processor
literals = {}


def bad_option_syntax(line):
    """Function to report bad syntax to screen and output file."""
    message = ('Unsupported syntax:\n\n%s\n\n' % (line))
    raise TestComparisonError(message)


def process_word_quotes(matchobj):
    """Function to determine if argument needs wrapping in quotes as string."""
    dollar = matchobj.group(2)
    val = matchobj.group(3)
    if dollar:
        # This is a python variable, make sure that it starts with a letter
        if re.match(r'^[A-Za-z][\w]*', val):
            return val
        else:
            message = ("Invalid Python variable: %s" % (val))
            raise TestComparisonError(message)
    elif re.match(r'^-?\d+\.?\d*(?:[Ee]-?\d+)?$', val):
        # This must be a number, don't wrap it in quotes
        return val
    elif re.match(r'^\'.*\'$', val) or re.match(r'^\".*\"$', val):
        # This is already wrapped in quotes, do nothing
        return val
    else:
        # This must be a string
        return "\"%s\"" % (val)


def quotify(string):
    """Function to wrap anything that looks like a string in quotes
    and to remove leading dollar signs from python variables.

    """
    # This wraps anything that looks like a string in quotes, and removes leading
    # dollar signs from python variables
    wordre = re.compile(r'(([$]?)([-+()*.\w\"\'/\\]+))')
    string = wordre.sub(process_word_quotes, string)
    return string


def process_option(spaces, module, key, value, line):
    """Function to process a line with set or in a set block
    into global/local domain and keyword/value.

    """
    module = module.upper()
    key = key.upper()
    value = quotify(value.strip())

    if module == "GLOBALS" or module == "GLOBAL" or module == "" or module.isspace():
        # If it's really a global, we need slightly different syntax
        return "%spsi4.set_global_option(\"%s\", %s)\n" % (spaces, key, value)
    else:
        # It's a local option, so we need the module name in there too
        return "%spsi4.set_local_option(\"%s\", \"%s\", %s)\n" % (spaces, module, key, value)


def process_set_command(matchobj):
    """Function to process match of all individual ``set (module_list)
    key {[value_list] or $value or value}``.

    """
    result = ""
    module_string = ""
    if matchobj.group(2):
        module_string = matchobj.group(2)
    for module in module_string.split(","):
        result += process_option(matchobj.group(1), module, matchobj.group(3), matchobj.group(4), matchobj.group(0))
    return result


def process_set_commands(matchobj):
    """Function to process match of ``set name? { ... }``."""
    spaces = matchobj.group(1)
    commands = matchobj.group(3)
    command_lines = re.split('\n', commands)
    # Remove trailing newline from each line
    map(lambda x: x.strip(), command_lines)
    result = ""
    module_string = ""
    command = ""
    if matchobj.group(2):
        module_string = matchobj.group(2)
    for module in module_string.split(","):
        for line in command_lines:
            # Chomp the trailing newline and accumulate
            command += line
            if not check_parentheses_and_brackets(command, 0):
                # If the brackets don't match up, we need to move on to the next line
                # and keep going, until they do match. Only then do we process the command
                continue
            # Ignore blank/empty lines
            if not line or line.isspace():
                continue
            matchobj = re.match(r'^\s*(\w+)[\s=]+(.*?)$', command)
            # Is the syntax correct? If so, process the line
            if matchobj:
                result += process_option(spaces, module, matchobj.group(1), matchobj.group(2), command)
                # Reset the string
                command = ""
            else:
                bad_option_syntax(command)
    return result


def process_from_file_command(matchobj):
    """Function that process a match of ``from_file`` in molecule block."""
    string = matchobj.group(2)
    mol=psi4.mol_from_file(string,1)
    tempmol=[line for line in mol.split('\n') if line.strip() != '']
    mol2=set(tempmol)
    mol=""
    for i in mol2:
           mol+=i
           mol+="\n"
    return mol


def process_pubchem_command(matchobj):
    """Function to process match of ``pubchem`` in molecule block."""
    string = matchobj.group(2)
    if re.match(r'^\s*[0-9]+\s*$', string):
        # This is just a number - must be a CID
        pcobj = pubchem.PubChemObj(int(string), '', '')
        try:
            return pcobj.getMoleculeString()
        except Exception as e:
            return e.message
    else:
        # Search pubchem for the provided string
        try:
            results = pubchem.getPubChemResults(string)
        except Exception as e:
            return e.message

        # N.B. Anything starting with PubchemError will be handled correctly by the molecule parser
        # in libmints, which will just print the rest of the string and exit gracefully.
        if not results:
            # Nothing!
            return "PubchemError\n\tNo results were found when searching PubChem for %s.\n" % (string)
        elif len(results) == 1:
            # There's only 1 result - use it
            return results[0].getMoleculeString()
        else:
            # There are multiple results. Print and exit
            msg = "\tPubchemError\n"
            msg += "\tMultiple pubchem results were found. Replace\n\n\t\tpubchem:%s\n\n" % (string)
            msg += "\twith the Chemical ID number or exact name from one of the following and re-run.\n\n"
            msg += "\t Chemical ID     IUPAC Name\n\n"
            for result in results:
                msg += "%s" % (result)
                if result.name().lower() == string.lower():
                    #We've found an exact match!
                    return result.getMoleculeString()
            return msg


def process_molecule_command(matchobj):
    """Function to process match of ``molecule name? { ... }``."""
    spaces = matchobj.group(1)
    name = matchobj.group(2)
    geometry = matchobj.group(3)
    pubchemre = re.compile(r'^(\s*pubchem\s*:\s*(.*)\n)$', re.MULTILINE | re.IGNORECASE)
    geometry = pubchemre.sub(process_pubchem_command, geometry)
    from_filere = re.compile(r'^(\s*from_file\s*:\s*(.*)\n)$', re.MULTILINE | re.IGNORECASE)
    geometry = from_filere.sub(process_from_file_command,geometry)
    molecule = spaces

    molecule += 'psi4.efp_init()\n'  # clear EFP object before Molecule read in
    molecule += spaces

    if name != "":
        molecule += '%s = ' % (name)

    molecule += 'geometry("""%s"""' % (geometry)
    if name != "":
        molecule += ',"%s"' % (name)

    molecule += ")\n"
    molecule += '%spsi4.IO.set_default_namespace("%s")' % (spaces, name)

    return molecule


def process_literal_blocks(matchobj):
    """Function to process match of ``literals_psi4_yo-...``."""
    return literals[matchobj.group(1)]


def process_cfour_command(matchobj):
    """Function to process match of ``cfour name? { ... }``."""
    spaces = matchobj.group(1)
    name = matchobj.group(2)
    cfourblock = matchobj.group(3)

    literalkey = str(random.randint(0, 99999))
    literals[literalkey] = cfourblock
    return "%spsi4.set_global_option(\"%s\", \"\"\"%s\n\"\"\")\n" % \
        (spaces, 'LITERAL_CFOUR', 'literals_psi4_yo-' + literalkey)


def process_extract_command(matchobj):
    """Function to process match of ``extract_subsets``."""
    spaces = matchobj.group(1)
    name = matchobj.group(2)
    result = matchobj.group(0)
    result += '%s%s.set_name("%s")' % (spaces, name, name)
    result += "\n%spsi4.set_active_molecule(%s)" % (spaces, name)
    result += '\n%spsi4.IO.set_default_namespace("%s")' % (spaces, name)

    return result


def process_print_command(matchobj):
    """Function to process match of ``print`` and transform
    it to ``psi4.print_out()``.

    """
    spaces = matchobj.group(1)
    string = matchobj.group(2)

    return "%spsi4.print_out(str(%s))\n" % (spaces, str(string))


def process_memory_command(matchobj):
    """Function to process match of ``memory ...``."""
    spaces = str(matchobj.group(1))
    sig = str(matchobj.group(2))
    units = str(matchobj.group(3))

    val = float(sig)
    memory_amount = val

    if units.upper() == 'KB':
        memory_amount = val * 1000
    elif units.upper() == 'MB':
        memory_amount = val * 1000000
    elif units.upper() == 'GB':
        memory_amount = val * 1000000000

    return "%spsi4.set_memory(%d)\n" % (spaces, int(memory_amount))


def basname(name):
    """Imitates BasisSet.make_filename() without the gbs extension"""
    return name.lower().replace('+', 'p').replace('*', 's').replace('(', '_').replace(')', '_').replace(',', '_')


def process_basis_block(matchobj):
    """Function to process match of ``basis name? { ... }``."""
    spaces = matchobj.group(1)
    basistype = matchobj.group(2).upper()
    name = matchobj.group(3)
    name = ('anonymous' + str(random.randint(0, 999))) if name == '' else name
    cleanbas = basname(name).replace('-', '')  # further remove hyphens so can be function name
    command_lines = re.split('\n', matchobj.group(4))

    symbol_re = re.compile(r'^\s*assign\s+(?P<symbol>[A-Z]{1,3})\s+(?P<basis>[-*\(\)\w]+)\s*$', re.IGNORECASE)
    label_re = re.compile(r'^\s*assign\s+(?P<label>(?P<symbol>[A-Z]{1,3})(?:(_\w+)|(\d+))?)\s+(?P<basis>[-*\(\)\w]+)\s*$', re.IGNORECASE)
    all_re = re.compile(r'^\s*assign\s+(?P<basis>[-*\(\)\w]+)\s*$', re.IGNORECASE)
    basislabel = re.compile(r'\s*\[\s*([-*\(\)\w]+)\s*\]\s*')

    result = """%sdef basisspec_psi4_yo__%s(mol, role):\n""" % (spaces, cleanbas)
    result += """%s    basstrings = {}\n""" % (spaces)

    # Start by looking for assign lines, and remove them
    leftover_lines = []
    for line in command_lines:
        if symbol_re.match(line):
            m = symbol_re.match(line)
            result += """%s    mol.set_basis_by_symbol("%s", "%s", role=role)\n""" % \
                (spaces, m.group('symbol'), m.group('basis'))

        elif label_re.match(line):
            m = label_re.match(line)
            result += """%s    mol.set_basis_by_label("%s", "%s", role=role)\n""" % \
                (spaces, m.group('label'), m.group('basis'))

        elif all_re.match(line):
            m = all_re.match(line)
            result += """%s    mol.set_basis_all_atoms("%s", role=role)\n""" % \
                (spaces, m.group('basis'))

        else:
            # Ignore blank lines and accumulate remainder
            if line and not line.isspace():
                leftover_lines.append(line.strip())

    # Now look for regular basis set definitions
    basblock = filter(None, basislabel.split('\n'.join(leftover_lines)))
    if len(basblock) == 1:
        if len(result.split('\n')) == 3:
            # case with no [basname] markers where whole block is contents of gbs file
            result += """%s    mol.set_basis_all_atoms("%s", role=role)\n""" % \
                (spaces, name)
            result += """%s    basstrings['%s'] = \"\"\"\n%s\n\"\"\"\n""" % \
                (spaces, basname(name), basblock[0])
        else:
            message = ("Conflicting basis set specification: assign lines present but shells have no [basname] label.""")
            raise TestComparisonError(message)
    else:
        # case with specs separated by [basname] markers
        for idx in range(0, len(basblock), 2):
            result += """%s    basstrings['%s'] = \"\"\"\n%s\n\"\"\"\n""" % \
                (spaces, basname(basblock[idx]), basblock[idx + 1])

    result += """%s    return basstrings\n""" % (spaces)
    result += """%spsi4.set_global_option(\"%s\", \"%s\")""" % (spaces, basistype, name)
    return result


def process_pcm_command(matchobj):
    """Function to process match of ``pcm name? { ... }``."""
    spacing = str(matchobj.group(1)) # Ignore..
    name = str(matchobj.group(2)) # Ignore..
    block = str(matchobj.group(3))
    fp = open('pcmsolver.inp', 'w')
    fp.write(block)
    fp.close()
    import pcm_placeholder
    sys.path.append(pcm_placeholder.PCMSOLVER_PARSE_DIR)
    import pcmsolver
    pcmsolver.parse_pcm_input('pcmsolver.inp')
    return "" # The file has been written to disk; nothing needed in Psi4 input


def process_external_command(matchobj):
    """Function to process match of ``external name? { ... }``."""
    spaces = str(matchobj.group(1))
    name = str(matchobj.group(2))
    if not name or name.isspace():
        name = "extern"
    block = str(matchobj.group(3))
    lines = re.split('\n', block)

    extern = "%sqmmm = QMMM()\n" % (spaces)

    NUMBER = "((?:[-+]?\\d*\\.\\d+(?:[DdEe][-+]?\\d+)?)|(?:[-+]?\\d+\\.\\d*(?:[DdEe][-+]?\\d+)?))"

    # Comments are all removed by this point
    # 0. Remove blank lines
    re_blank = re.compile(r'^\s*$')
    lines2 = []
    for line in lines:
        mobj = re_blank.match(line)
        if mobj:
            pass
        else:
            lines2.append(line)
    lines = lines2

    # 1. Look for units [ang|bohr|au|a.u.] defaults to ang
    re_units = re.compile(r'^\s*units?[\s=]+((ang)|(angstrom)|(bohr)|(au)|(a\.u\.))$\s*', re.IGNORECASE)
    units = 'ang'
    lines2 = []
    for line in lines:
        mobj = re_units.match(line)
        if mobj:
            unit = mobj.group(1)
            if unit in ['bohr', 'au', 'a.u.']:
                units = 'bohr'
            else:
                units = 'ang'
        else:
            lines2.append(line)
    lines = lines2

    # 2. Look for basis basisname, defaults to cc-pvdz
    # 3. Look for df_basis_scf basisname, defaults to cc-pvdz-jkfit
    re_basis = re.compile(r'\s*basis[\s=]+(\S+)\s*$', re.IGNORECASE)
    re_df_basis = re.compile(r'\s*df_basis_scf[\s=]+(\S+)\s*$', re.IGNORECASE)
    basis = 'cc-pvdz'
    df_basis_scf = 'cc-pvdz-jkfit'
    lines2 = []
    for line in lines:
        mobj = re_basis.match(line)
        if mobj:
            basis = mobj.group(1)
        else:
            mobj = re_df_basis.match(line)
            if mobj:
                df_basis_scf = mobj.group(1)
            else:
                lines2.append(line)
    lines = lines2

    # 4. Look for charge lines Z x y z, convert according to unit convention
    charge_re = re.compile(r'^\s*' + NUMBER + r'\s+' + NUMBER + r'\s+' + NUMBER + r'\s+' + NUMBER + r'\s*$')
    lines2 = []
    for line in lines:
        mobj = charge_re.match(line)
        if mobj:
            if units == 'ang':
                extern += '%sqmmm.addChargeAngstrom(%s,%s,%s,%s)\n' % (spaces, mobj.group(1), mobj.group(2), mobj.group(3), mobj.group(4))
            if units == 'bohr':
                extern += '%sqmmm.addChargeBohr(%s,%s,%s,%s)\n' % (spaces, mobj.group(1), mobj.group(2), mobj.group(3), mobj.group(4))
        else:
            lines2.append(line)
    lines = lines2

    # 5. Look for diffuse regions, which are XYZ molecules seperated by the usual -- lines
    spacer_re = re.compile(r'^\s*--\s*$')
    frags = []
    frags.append([])
    for line in lines:
        mobj = spacer_re.match(line)
        if mobj:
            if len(frags[len(frags) - 1]):
                frags.append([])
        else:
            frags[len(frags) - 1].append(line)

    extern += '%sextern_mol_temp = psi4.get_active_molecule()\n' % (spaces)

    mol_re = re.compile(r'\s*\S+\s+' + NUMBER + r'\s+' + NUMBER + r'\s+' + NUMBER + r'\s*$')
    lines = []
    for frag in frags:

        if not len(frag):
            continue

        extern += '%sexternal_diffuse = geometry("""\n' % (spaces)
        extern += '%s0 1\n' % (spaces)

        for line in frag:
            if not mol_re.match(line):
                lines.append(line)
            else:
                extern += '%s%s\n' % (spaces, line)

        extern += '%sunits %s\n' % (spaces, units)
        extern += '%ssymmetry c1\n' % (spaces)
        extern += '%sno_reorient\n' % (spaces)
        extern += '%sno_com\n' % (spaces)
        extern += '%s""")\n' % (spaces)
        extern += "%sdiffuse = Diffuse(external_diffuse,'%s','%s')\n" % (spaces, basis, df_basis_scf)
        extern += '%sdiffuse.fitScf()\n' % (spaces)
        extern += '%sqmmm.addDiffuse(diffuse)\n' % (spaces)
        extern += '\n'

    extern += '%spsi4.set_active_molecule(extern_mol_temp)\n' % (spaces)

    # 6. If there is anything left, the user messed up
    if len(lines):
        print('Input parsing for external {}: Extra line(s) present:')
        for line in lines:
            raise TestComparisonError(line)

    # Return is actually an ExternalPotential, not a QMMM
    extern += '%sqmmm.populateExtern()\n' % (spaces)
    extern += '%s%s = qmmm.extern\n' % (spaces, name)

    extern += '%spsi4.set_global_option_python("EXTERN", extern)\n' % (spaces)

    return extern


def check_parentheses_and_brackets(input_string, exit_on_error):
    """Function to check that all parenthesis and brackets
    in *input_string* are paired. On that condition, *exit_on_error* =1,
    otherwise 0.

    """
    # This returns 1 if the string's all matched up, 0 otherwise
    import collections

    # create left to right parenthesis mappings
    lrmap = {"(": ")", "[": "]", "{": "}"}

    # derive sets of left and right parentheses
    lparens = set(lrmap.keys())
    rparens = set(lrmap.values())

    parenstack = collections.deque()
    all_matched = 1
    for ch in input_string:
        if ch in lparens:
            parenstack.append(ch)
        elif ch in rparens:
            opench = ""
            try:
                opench = parenstack.pop()
            except IndexError:
                # Run out of opening parens
                all_matched = 0
                if exit_on_error:
                    message = ("Input error: extra %s" % (ch))
                    raise TestComparisonError(message)
            if lrmap[opench] != ch:
                # wrong type of parenthesis popped from stack
                all_matched = 0
                if exit_on_error:
                    message = ("Input error: %s closed with a %s" % (opench, ch))
                    raise TestComparisonError(message)
    if len(parenstack) != 0:
        all_matched = 0
        if exit_on_error:
            message = ("Input error: Unmatched %s" % (parenstack.pop()))
            raise TestComparisonError(message)

    return all_matched


def parse_multiline_array(input_list):
    """Function to squash multiline arrays into a single line
    until all parentheses and brackets are fully paired.

    """
    line = input_list.pop(0)
    # Keep adding lines to the current one, until all parens match up
    while not check_parentheses_and_brackets(line, 0):
        thisline = input_list.pop(0).strip()
        line += thisline
    return "%s\n" % (line)


def process_multiline_arrays(inputfile):
    """Function to find array inputs that are spread across multiple
    lines and squash them into a single line.

    """
    # This function takes multiline array inputs, and puts them on a single line
    # Start by converting the input to a list, splitting at newlines
    input_list = inputfile.split("\n")
    set_re = re.compile(r'^(\s*?)set\s+(?:([-,\w]+)\s+)?(\w+)[\s=]+\[.*', re.IGNORECASE)
    newinput = ""
    while len(input_list):
        line = input_list[0]
        if set_re.match(line):
            # We've found the start of a set matrix [ .... line - hand it off for more checks
            newinput += parse_multiline_array(input_list)
        else:
            # Nothing to do - just add the line to the string
            newinput += "%s\n" % (input_list.pop(0))
    return newinput


def process_input(raw_input, print_level=1):
    """Function to preprocess *raw input*, the text of the input file, then
    parse it, validate it for format, and convert it into legitimate Python.
    *raw_input* is printed to the output file unless *print_level* =0. Does
    a series of regular expression filters, where the matching portion of the
    input is replaced by the output of the corresponding function (in this
    module) call. Returns a string concatenating module import lines, a copy
    of the user's .psi4rc files, a setting of the scratch directory, a dummy
    molecule, and the processed *raw_input*.

    """
    # Check if the infile is actually an outfile (yeah we did)
    psi4_id = re.compile(r'PSI4: An Open-Source Ab Initio Electronic Structure Package')
    if re.search(psi4_id, raw_input):
        input_lines = raw_input.split("\n")
        input_re = re.compile(r'^\s*?\=\=> Input File <\=\=')
        input_start = -1
        for line_count in range(len(input_lines)):
            line = input_lines[line_count]
            if re.match(input_re, line):
                input_start = line_count + 3
                break

        stop_re = re.compile(r'^-{74}')
        input_stop = -1
        for line_count in range(input_start, len(input_lines)):
            line = input_lines[line_count]
            if re.match(stop_re, line):
                input_stop = line_count
                break

        if input_start == -1 or input_stop == -1:
            message = ('Cannot extract infile from outfile.')
            raise TestComparisonError(message)

        raw_input = '\n'.join(input_lines[input_start:input_stop])
        raw_input += '\n'

    # Echo the infile on the outfile
    if print_level > 0:
        psi4.print_out("\n  ==> Input File <==\n\n")
        psi4.print_out("--------------------------------------------------------------------------\n")
        psi4.print_out(raw_input)
        psi4.print_out("--------------------------------------------------------------------------\n")
        psi4.flush_outfile()

    #NOTE: If adding mulitline data to the preprocessor, use ONLY the following syntax:
    #   function [objname] { ... }
    #   which has the regex capture group:
    #
    #   r'^(\s*?)FUNCTION\s*(\w*?)\s*\{(.*?)\}', re.MULTILINE | re.DOTALL | re.IGNORECASE
    #
    #   your function is in capture group #1
    #   your objname is in capture group #2
    #   your data is in capture group #3

    # Sections that are truly to be taken literally (spaces included)
    #   Must be stored then subbed in the end to escape the normal processing

    # Process "cfour name? { ... }"
    cfour = re.compile(r'^(\s*?)cfour[=\s]*(\w*?)\s*\{(.*?)\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(cfour, process_cfour_command, raw_input)

    # Return from handling literal blocks to normal processing

    # Nuke all comments
    comment = re.compile(r'(^|[^\\])#.*')
    temp = re.sub(comment, '', temp)
    # Now, nuke any escapes from comment lines
    comment = re.compile(r'\\#')
    temp = re.sub(comment, '#', temp)

    # Check the brackets and parentheses match up, as long as this is not a pickle input file
    if not re.search(r'pickle_kw', temp):
        check_parentheses_and_brackets(temp, 1)

    # First, remove everything from lines containing only spaces
    blankline = re.compile(r'^\s*$')
    temp = re.sub(blankline, '', temp, re.MULTILINE)

    # Look for things like
    # set matrix [
    #              [ 1, 2 ],
    #              [ 3, 4 ]
    #            ]
    # and put them on a single line
    temp = process_multiline_arrays(temp)

    # Process all "set name? { ... }"
    set_commands = re.compile(r'^(\s*?)set\s*([-,\w]*?)[\s=]*\{(.*?)\}',
                              re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(set_commands, process_set_commands, temp)

    # Process all individual "set (module_list) key  {[value_list] or $value or value}"
    # N.B. We have to be careful here, because \s matches \n, leading to potential problems
    # with undesired multiline matches.  Better the double-negative [^\S\n] instead, which
    # will match any space, tab, etc., except a newline
    set_command = re.compile(r'^(\s*?)set\s+(?:([-,\w]+)[^\S\n]+)?(\w+)(?:[^\S\n]|=)+((\[.*\])|(\$?[-+,*()\.\w]+))\s*$',
                             re.MULTILINE | re.IGNORECASE)
    temp = re.sub(set_command, process_set_command, temp)

    # Process "molecule name? { ... }"
    molecule = re.compile(r'^(\s*?)molecule[=\s]*(\w*?)\s*\{(.*?)\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(molecule, process_molecule_command, temp)

    # Process "external name? { ... }"
    external = re.compile(r'^(\s*?)external[=\s]*(\w*?)\s*\{(.*?)\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(external, process_external_command, temp)

    # Process "pcm name? { ... }"
    pcm = re.compile(r'^(\s*?)pcm[=\s]*(\w*?)\s*\{(.*?)^\}',
                          re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(pcm, process_pcm_command, temp)

    # Then remove repeated newlines
    multiplenewlines = re.compile(r'\n+')
    temp = re.sub(multiplenewlines, '\n', temp)

    # Process " extract"
    extract = re.compile(r'(\s*?)(\w+)\s*=\s*\w+\.extract_subsets.*',
                         re.IGNORECASE)
    temp = re.sub(extract, process_extract_command, temp)

    # Process "print" and transform it to "psi4.print_out()"
    #print_string = re.compile(r'(\s*?)print\s+(.*)', re.IGNORECASE)
    #temp = re.sub(print_string, process_print_command, temp)

    # Process "memory ... "
    memory_string = re.compile(r'(\s*?)memory\s+([+-]?\d*\.?\d+)\s+([KMG]i?B)',
                               re.IGNORECASE)
    temp = re.sub(memory_string, process_memory_command, temp)

    # Process "basis name? { ... }"
    basis_block = re.compile(r'^(\s*?)(basis|df_basis_scf|df_basis_mp2|df_basis_cc|df_basis_sapt)[=\s]*(\w*?)\s*\{(.*?)\}',
                             re.MULTILINE | re.DOTALL | re.IGNORECASE)
    temp = re.sub(basis_block, process_basis_block, temp)

    # Process literal blocks by substituting back in
    lit_block = re.compile(r'literals_psi4_yo-(\d*\d)')
    temp = re.sub(lit_block, process_literal_blocks, temp)

    # imports
    imports = 'from psi4 import *\n'
    imports += 'from p4const import *\n'
    imports += 'from p4util import *\n'
    imports += 'from molutil import *\n'
#CU    imports += 'from driver import *\n'
#CU    imports += 'from wrappers import *\n'
#CU    imports += 'from wrappers_cfour import *\n'
#CU    imports += 'from gaussian_n import *\n'
    imports += 'from aliases import *\n'
#CU    imports += 'from functional import *\n'
#    imports += 'from qmmm import *\n'
    imports += 'psi4_io = psi4.IOManager.shared_object()\n'
    imports += 'psi4.efp_init()\n'  # initialize EFP object before Molecule read in

    # psirc (a baby PSIthon script that might live in ~/.psi4rc)
    psirc = ''
    homedir = os.path.expanduser('~')
    psirc_file = homedir + '/.psi4rc'
    if os.path.isfile(psirc_file):
        fh = open(psirc_file)
        psirc = fh.read()
        fh.close()

    # Override scratch directory if user specified via env_var
    scratch = ''
    scratch_env = psi4.Process.environment['PSI_SCRATCH']
    if len(scratch_env):
        scratch += 'psi4_io.set_default_path("%s")\n' % (scratch_env)

    blank_mol = 'geometry("""\n'
    blank_mol += '0 1\nH\nH 1 0.74\n'
    blank_mol += '""","blank_molecule_psi4_yo")\n'

    temp = imports + psirc + scratch + blank_mol + temp

    return temp

if __name__ == "__main__":
    result = process_input("""
molecule h2 {
H
H 1 R

R = .9
}

set basis 6-31G**

""")

    print("Result\n==========================")
    print(result)