This file is indexed.

/usr/share/doc/python-patsy-doc/html/quickstart.html is in python-patsy-doc 0.4.1-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">


<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    
    <title>Quickstart &mdash; patsy 0.4.1 documentation</title>
    
    <link rel="stylesheet" href="_static/classic.css" type="text/css" />
    <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="_static/facebox.css" type="text/css" />
    
    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    './',
        VERSION:     '0.4.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  true
      };
    </script>
    <script type="text/javascript" src="_static/jquery.js"></script>
    <script type="text/javascript" src="_static/underscore.js"></script>
    <script type="text/javascript" src="_static/doctools.js"></script>
    <script type="text/javascript" src="_static/show-code.js"></script>
    <script type="text/javascript" src="_static/facebox.js"></script>
    <link rel="top" title="patsy 0.4.1 documentation" href="index.html" />
    <link rel="next" title="How formulas work" href="formulas.html" />
    <link rel="prev" title="Overview" href="overview.html" /> 
  </head>
  <body role="document">
    <div class="related" role="navigation" aria-label="related navigation">
      <h3>Navigation</h3>
      <ul>
        <li class="right" style="margin-right: 10px">
          <a href="genindex.html" title="General Index"
             accesskey="I">index</a></li>
        <li class="right" >
          <a href="py-modindex.html" title="Python Module Index"
             >modules</a> |</li>
        <li class="right" >
          <a href="formulas.html" title="How formulas work"
             accesskey="N">next</a> |</li>
        <li class="right" >
          <a href="overview.html" title="Overview"
             accesskey="P">previous</a> |</li>
        <li class="nav-item nav-item-0"><a href="index.html">patsy 0.4.1 documentation</a> &raquo;</li> 
      </ul>
    </div>  

    <div class="document">
      <div class="documentwrapper">
        <div class="bodywrapper">
          <div class="body" role="main">
            
  <div class="section" id="quickstart">
<h1>Quickstart<a class="headerlink" href="#quickstart" title="Permalink to this headline">ΒΆ</a></h1>
<p>If you prefer to learn by diving in and getting your feet wet, then
here are some cut-and-pasteable examples to play with.</p>
<p>First, let&#8217;s import stuff and get some data to work with:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [1]: </span><span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>

<span class="gp">In [2]: </span><span class="kn">from</span> <span class="nn">patsy</span> <span class="kn">import</span> <span class="n">dmatrices</span><span class="p">,</span> <span class="n">dmatrix</span><span class="p">,</span> <span class="n">demo_data</span>

<span class="gp">In [3]: </span><span class="n">data</span> <span class="o">=</span> <span class="n">demo_data</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;x1&quot;</span><span class="p">,</span> <span class="s">&quot;x2&quot;</span><span class="p">,</span> <span class="s">&quot;y&quot;</span><span class="p">,</span> <span class="s">&quot;z column&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p><a class="reference internal" href="API-reference.html#patsy.demo_data" title="patsy.demo_data"><code class="xref py py-func docutils literal"><span class="pre">demo_data()</span></code></a> gives us a mix of categorical and numerical
variables:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [4]: </span><span class="n">data</span>
<span class="gh">Out[4]: </span><span class="go"></span>
<span class="go">{&#39;a&#39;: [&#39;a1&#39;, &#39;a1&#39;, &#39;a2&#39;, &#39;a2&#39;, &#39;a1&#39;, &#39;a1&#39;, &#39;a2&#39;, &#39;a2&#39;],</span>
<span class="go"> &#39;b&#39;: [&#39;b1&#39;, &#39;b2&#39;, &#39;b1&#39;, &#39;b2&#39;, &#39;b1&#39;, &#39;b2&#39;, &#39;b1&#39;, &#39;b2&#39;],</span>
<span class="go"> &#39;x1&#39;: array([ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799,</span>
<span class="go">        -0.97727788,  0.95008842, -0.15135721]),</span>
<span class="go"> &#39;x2&#39;: array([-0.10321885,  0.4105985 ,  0.14404357,  1.45427351,  0.76103773,</span>
<span class="go">         0.12167502,  0.44386323,  0.33367433]),</span>
<span class="go"> &#39;y&#39;: array([ 1.49407907, -0.20515826,  0.3130677 , -0.85409574, -2.55298982,</span>
<span class="go">         0.6536186 ,  0.8644362 , -0.74216502]),</span>
<span class="go"> &#39;z column&#39;: array([ 2.26975462, -1.45436567,  0.04575852, -0.18718385,  1.53277921,</span>
<span class="go">         1.46935877,  0.15494743,  0.37816252])}</span>
</pre></div>
</div>
<p>Of course Patsy doesn&#8217;t much care what sort of object you store
your data in, so long as it can be indexed like a Python dictionary,
<code class="docutils literal"><span class="pre">data[varname]</span></code>. You may prefer to store your data in a <a class="reference external" href="http://pandas.pydata.org">pandas</a> DataFrame, or a numpy
record array... whatever makes you happy.</p>
<p>Now, let&#8217;s generate design matrices suitable for regressing <code class="docutils literal"><span class="pre">y</span></code> onto
<code class="docutils literal"><span class="pre">x1</span></code> and <code class="docutils literal"><span class="pre">x2</span></code>.</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [5]: </span><span class="n">dmatrices</span><span class="p">(</span><span class="s">&quot;y ~ x1 + x2&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[5]: </span><span class="go"></span>
<span class="go">(DesignMatrix with shape (8, 1)</span>
<span class="go">          y</span>
<span class="go">    1.49408</span>
<span class="go">   -0.20516</span>
<span class="go">    0.31307</span>
<span class="go">   -0.85410</span>
<span class="go">   -2.55299</span>
<span class="go">    0.65362</span>
<span class="go">    0.86444</span>
<span class="go">   -0.74217</span>
<span class="go">   Terms:</span>
<span class="go">     &#39;y&#39; (column 0),</span>
<span class="go"> DesignMatrix with shape (8, 3)</span>
<span class="go">   Intercept        x1        x2</span>
<span class="go">           1   1.76405  -0.10322</span>
<span class="go">           1   0.40016   0.41060</span>
<span class="go">           1   0.97874   0.14404</span>
<span class="go">           1   2.24089   1.45427</span>
<span class="go">           1   1.86756   0.76104</span>
<span class="go">           1  -0.97728   0.12168</span>
<span class="go">           1   0.95009   0.44386</span>
<span class="go">           1  -0.15136   0.33367</span>
<span class="go">   Terms:</span>
<span class="go">     &#39;Intercept&#39; (column 0)</span>
<span class="go">     &#39;x1&#39; (column 1)</span>
<span class="go">     &#39;x2&#39; (column 2))</span>
</pre></div>
</div>
<p>The return value is a Python tuple containing two DesignMatrix
objects, the first representing the left-hand side of our formula, and
the second representing the right-hand side. Notice that an intercept
term was automatically added to the right-hand side. These are just
ordinary numpy arrays with some extra metadata and a fancy __repr__
method attached, so we can pass them directly to a regression function
like <code class="xref py py-func docutils literal"><span class="pre">np.linalg.lstsq()</span></code>:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [6]: </span><span class="n">outcome</span><span class="p">,</span> <span class="n">predictors</span> <span class="o">=</span> <span class="n">dmatrices</span><span class="p">(</span><span class="s">&quot;y ~ x1 + x2&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>

<span class="gp">In [7]: </span><span class="n">betas</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">lstsq</span><span class="p">(</span><span class="n">predictors</span><span class="p">,</span> <span class="n">outcome</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">ravel</span><span class="p">()</span>

<span class="gp">In [8]: </span><span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">beta</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">predictors</span><span class="o">.</span><span class="n">design_info</span><span class="o">.</span><span class="n">column_names</span><span class="p">,</span> <span class="n">betas</span><span class="p">):</span>
<span class="gp">   ...: </span>    <span class="k">print</span><span class="p">(</span><span class="s">&quot;</span><span class="si">%s</span><span class="s">: </span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">beta</span><span class="p">))</span>
<span class="gp">   ...: </span>
<span class="go">Intercept: 0.579662344123</span>
<span class="go">x1: 0.0885991903554</span>
<span class="go">x2: -1.76479205551</span>
</pre></div>
</div>
<p>Of course the resulting numbers aren&#8217;t very interesting, since this is just
random data.</p>
<p>If you just want the design matrix alone, without the <code class="docutils literal"><span class="pre">y</span></code> values,
use <a class="reference internal" href="API-reference.html#patsy.dmatrix" title="patsy.dmatrix"><code class="xref py py-func docutils literal"><span class="pre">dmatrix()</span></code></a> and leave off the <code class="docutils literal"><span class="pre">y</span> <span class="pre">~</span></code> part at the beginning:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [9]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;x1 + x2&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[9]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 3)</span>
<span class="go">  Intercept        x1        x2</span>
<span class="go">          1   1.76405  -0.10322</span>
<span class="go">          1   0.40016   0.41060</span>
<span class="go">          1   0.97874   0.14404</span>
<span class="go">          1   2.24089   1.45427</span>
<span class="go">          1   1.86756   0.76104</span>
<span class="go">          1  -0.97728   0.12168</span>
<span class="go">          1   0.95009   0.44386</span>
<span class="go">          1  -0.15136   0.33367</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;x1&#39; (column 1)</span>
<span class="go">    &#39;x2&#39; (column 2)</span>
</pre></div>
</div>
<p>We&#8217;ll use dmatrix for the rest of the examples, since seeing the
outcome matrix over and over would get boring. This matrix&#8217;s metadata
is stored in an extra attribute called <code class="docutils literal"><span class="pre">.design_info</span></code>, which is a
<a class="reference internal" href="API-reference.html#patsy.DesignInfo" title="patsy.DesignInfo"><code class="xref py py-class docutils literal"><span class="pre">DesignInfo</span></code></a> object you can explore at your leisure:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [10]: </span><span class="n">d</span> <span class="o">=</span> <span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;x1 + x2&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>

<span class="gp">In [11]: </span><span class="n">d</span><span class="o">.</span><span class="n">design_info</span><span class="o">.&lt;</span><span class="n">TAB</span><span class="o">&gt;</span>
<span class="go">d.design_info.builder              d.design_info.slice</span>
<span class="go">d.design_info.column_name_indexes  d.design_info.term_name_slices</span>
<span class="go">d.design_info.column_names         d.design_info.term_names</span>
<span class="go">d.design_info.describe             d.design_info.term_slices</span>
<span class="go">d.design_info.linear_constraint    d.design_info.terms</span>
</pre></div>
</div>
<p>Usually the intercept is useful, but if we don&#8217;t want it we can get
rid of it:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [12]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;x1 + x2 - 1&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[12]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 2)</span>
<span class="go">        x1        x2</span>
<span class="go">   1.76405  -0.10322</span>
<span class="go">   0.40016   0.41060</span>
<span class="go">   0.97874   0.14404</span>
<span class="go">   2.24089   1.45427</span>
<span class="go">   1.86756   0.76104</span>
<span class="go">  -0.97728   0.12168</span>
<span class="go">   0.95009   0.44386</span>
<span class="go">  -0.15136   0.33367</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;x1&#39; (column 0)</span>
<span class="go">    &#39;x2&#39; (column 1)</span>
</pre></div>
</div>
<p>We can transform variables using arbitrary Python code:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [13]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;x1 + np.log(x2 + 10)&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[13]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 3)</span>
<span class="go">  Intercept        x1  np.log(x2 + 10)</span>
<span class="go">          1   1.76405          2.29221</span>
<span class="go">          1   0.40016          2.34282</span>
<span class="go">          1   0.97874          2.31689</span>
<span class="go">          1   2.24089          2.43836</span>
<span class="go">          1   1.86756          2.37593</span>
<span class="go">          1  -0.97728          2.31468</span>
<span class="go">          1   0.95009          2.34601</span>
<span class="go">          1  -0.15136          2.33541</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;x1&#39; (column 1)</span>
<span class="go">    &#39;np.log(x2 + 10)&#39; (column 2)</span>
</pre></div>
</div>
<p>Notice that <code class="docutils literal"><span class="pre">np.log</span></code> is being pulled out of the environment where
<a class="reference internal" href="API-reference.html#patsy.dmatrix" title="patsy.dmatrix"><code class="xref py py-func docutils literal"><span class="pre">dmatrix()</span></code></a> was called &#8211; <code class="docutils literal"><span class="pre">np.log</span></code> is accessible because we did
<code class="docutils literal"><span class="pre">import</span> <span class="pre">numpy</span> <span class="pre">as</span> <span class="pre">np</span></code> up above. Any functions or variables that you
could reference when calling <a class="reference internal" href="API-reference.html#patsy.dmatrix" title="patsy.dmatrix"><code class="xref py py-func docutils literal"><span class="pre">dmatrix()</span></code></a> can also be used inside
the formula passed to <a class="reference internal" href="API-reference.html#patsy.dmatrix" title="patsy.dmatrix"><code class="xref py py-func docutils literal"><span class="pre">dmatrix()</span></code></a>. For example:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [14]: </span><span class="n">new_x2</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s">&quot;x2&quot;</span><span class="p">]</span> <span class="o">*</span> <span class="mi">100</span>

<span class="gp">In [15]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;new_x2&quot;</span><span class="p">)</span>
<span class="gh">Out[15]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 2)</span>
<span class="go">  Intercept     new_x2</span>
<span class="go">          1  -10.32189</span>
<span class="go">          1   41.05985</span>
<span class="go">          1   14.40436</span>
<span class="go">          1  145.42735</span>
<span class="go">          1   76.10377</span>
<span class="go">          1   12.16750</span>
<span class="go">          1   44.38632</span>
<span class="go">          1   33.36743</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;new_x2&#39; (column 1)</span>
</pre></div>
</div>
<p>Patsy has some transformation functions &#8220;built in&#8221;, that are
automatically accessible to your code:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [16]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;center(x1) + standardize(x2)&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[16]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 3)</span>
<span class="go">  Intercept  center(x1)  standardize(x2)</span>
<span class="go">          1     0.87995         -1.21701</span>
<span class="go">          1    -0.48395         -0.07791</span>
<span class="go">          1     0.09463         -0.66885</span>
<span class="go">          1     1.35679          2.23584</span>
<span class="go">          1     0.98345          0.69899</span>
<span class="go">          1    -1.86138         -0.71844</span>
<span class="go">          1     0.06598         -0.00417</span>
<span class="go">          1    -1.03546         -0.24845</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;center(x1)&#39; (column 1)</span>
<span class="go">    &#39;standardize(x2)&#39; (column 2)</span>
</pre></div>
</div>
<p>See <a class="reference internal" href="builtins-reference.html#module-patsy.builtins" title="patsy.builtins"><code class="xref py py-mod docutils literal"><span class="pre">patsy.builtins</span></code></a> for a complete list of functions made
available to formulas. You can also define your own transformation
functions in the ordinary Python way:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [17]: </span><span class="k">def</span> <span class="nf">double</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
<span class="gp">   ....: </span>    <span class="k">return</span> <span class="mi">2</span> <span class="o">*</span> <span class="n">x</span>
<span class="gp">   ....: </span>

<span class="gp">In [18]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;x1 + double(x1)&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[18]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 3)</span>
<span class="go">  Intercept        x1  double(x1)</span>
<span class="go">          1   1.76405     3.52810</span>
<span class="go">          1   0.40016     0.80031</span>
<span class="go">          1   0.97874     1.95748</span>
<span class="go">          1   2.24089     4.48179</span>
<span class="go">          1   1.86756     3.73512</span>
<span class="go">          1  -0.97728    -1.95456</span>
<span class="go">          1   0.95009     1.90018</span>
<span class="go">          1  -0.15136    -0.30271</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;x1&#39; (column 1)</span>
<span class="go">    &#39;double(x1)&#39; (column 2)</span>
</pre></div>
</div>
<p>This flexibility does create problems in one case, though &#8211; because
we interpret whatever you write in-between the <code class="docutils literal"><span class="pre">+</span></code> signs as Python
code, you do in fact have to write valid Python code. And this can be
tricky if your variable names have funny characters in them, like
whitespace or punctuation. Fortunately, patsy has a builtin
&#8220;transformation&#8221; called <a class="reference internal" href="builtins-reference.html#patsy.builtins.Q" title="patsy.builtins.Q"><code class="xref py py-func docutils literal"><span class="pre">Q()</span></code></a> that lets you &#8220;quote&#8221; such
variables:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [19]: </span><span class="n">weird_data</span> <span class="o">=</span> <span class="n">demo_data</span><span class="p">(</span><span class="s">&quot;weird column!&quot;</span><span class="p">,</span> <span class="s">&quot;x1&quot;</span><span class="p">)</span>

<span class="go"># This is an error...</span>
<span class="gp">In [20]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;weird column! + x1&quot;</span><span class="p">,</span> <span class="n">weird_data</span><span class="p">)</span>
<span class="go">[...]</span>
<span class="go">PatsyError: error tokenizing input (maybe an unclosed string?)</span>
<span class="go">    weird column! + x1</span>
<span class="go">                ^</span>

<span class="go"># ...but this works:</span>
<span class="gp">In [21]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;Q(&#39;weird column!&#39;) + x1&quot;</span><span class="p">,</span> <span class="n">weird_data</span><span class="p">)</span>
<span class="gh">Out[21]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (5, 3)</span>
<span class="go">  Intercept  Q(&#39;weird column!&#39;)        x1</span>
<span class="go">          1             1.76405  -0.97728</span>
<span class="go">          1             0.40016   0.95009</span>
<span class="go">          1             0.97874  -0.15136</span>
<span class="go">          1             2.24089  -0.10322</span>
<span class="go">          1             1.86756   0.41060</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &quot;Q(&#39;weird column!&#39;)&quot; (column 1)</span>
<span class="go">    &#39;x1&#39; (column 2)</span>
</pre></div>
</div>
<p><a class="reference internal" href="builtins-reference.html#patsy.builtins.Q" title="patsy.builtins.Q"><code class="xref py py-func docutils literal"><span class="pre">Q()</span></code></a> even plays well with other transformations:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [22]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;double(Q(&#39;weird column!&#39;)) + x1&quot;</span><span class="p">,</span> <span class="n">weird_data</span><span class="p">)</span>
<span class="gh">Out[22]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (5, 3)</span>
<span class="go">  Intercept  double(Q(&#39;weird column!&#39;))        x1</span>
<span class="go">          1                     3.52810  -0.97728</span>
<span class="go">          1                     0.80031   0.95009</span>
<span class="go">          1                     1.95748  -0.15136</span>
<span class="go">          1                     4.48179  -0.10322</span>
<span class="go">          1                     3.73512   0.41060</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &quot;double(Q(&#39;weird column!&#39;))&quot; (column 1)</span>
<span class="go">    &#39;x1&#39; (column 2)</span>
</pre></div>
</div>
<p>Arithmetic transformations are also possible, but you&#8217;ll need to
&#8220;protect&#8221; them by wrapping them in <a class="reference internal" href="builtins-reference.html#patsy.builtins.I" title="patsy.builtins.I"><code class="xref py py-func docutils literal"><span class="pre">I()</span></code></a>, so that Patsy knows
that you really do want <code class="docutils literal"><span class="pre">+</span></code> to mean addition:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [23]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;I(x1 + x2)&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>  <span class="c"># compare to &quot;x1 + x2&quot;</span>
<span class="gh">Out[23]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 2)</span>
<span class="go">  Intercept  I(x1 + x2)</span>
<span class="go">          1     1.66083</span>
<span class="go">          1     0.81076</span>
<span class="go">          1     1.12278</span>
<span class="go">          1     3.69517</span>
<span class="go">          1     2.62860</span>
<span class="go">          1    -0.85560</span>
<span class="go">          1     1.39395</span>
<span class="go">          1     0.18232</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;I(x1 + x2)&#39; (column 1)</span>
</pre></div>
</div>
<p>Note that while Patsy goes to considerable efforts to take in data
represented using different Python data types and convert them into a
standard representation, all this work happens <em>after</em> any
transformations you perform as part of your formula. So, for example,
if your data is in the form of numpy arrays, &#8220;+&#8221; will perform
element-wise addition, but if it is in standard Python lists, it will
perform concatentation:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [24]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;I(x1 + x2)&quot;</span><span class="p">,</span> <span class="p">{</span><span class="s">&quot;x1&quot;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">]),</span> <span class="s">&quot;x2&quot;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">])})</span>
<span class="gh">Out[24]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (3, 2)</span>
<span class="go">  Intercept  I(x1 + x2)</span>
<span class="go">          1           5</span>
<span class="go">          1           7</span>
<span class="go">          1           9</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;I(x1 + x2)&#39; (column 1)</span>

<span class="gp">In [25]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;I(x1 + x2)&quot;</span><span class="p">,</span> <span class="p">{</span><span class="s">&quot;x1&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s">&quot;x2&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]})</span>
<span class="gh">Out[25]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (6, 2)</span>
<span class="go">  Intercept  I(x1 + x2)</span>
<span class="go">          1           1</span>
<span class="go">          1           2</span>
<span class="go">          1           3</span>
<span class="go">          1           4</span>
<span class="go">          1           5</span>
<span class="go">          1           6</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;I(x1 + x2)&#39; (column 1)</span>
</pre></div>
</div>
<p>Patsy becomes particularly useful when you have categorical
data. If you use a predictor that has a categorical type (e.g. strings
or bools), it will be automatically coded. Patsy automatically
chooses an appropriate way to code categorical data to avoid
producing a redundant, overdetermined model.</p>
<p>If there is just one categorical variable alone, the default is to
dummy code it:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [26]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;0 + a&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[26]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 2)</span>
<span class="go">  a[a1]  a[a2]</span>
<span class="go">      1      0</span>
<span class="go">      1      0</span>
<span class="go">      0      1</span>
<span class="go">      0      1</span>
<span class="go">      1      0</span>
<span class="go">      1      0</span>
<span class="go">      0      1</span>
<span class="go">      0      1</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;a&#39; (columns 0:2)</span>
</pre></div>
</div>
<p>But if you did that and put the intercept back in, you&#8217;d get a
redundant model. So if the intercept is present, Patsy uses
a reduced-rank contrast code (treatment coding by default):</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [27]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[27]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 2)</span>
<span class="go">  Intercept  a[T.a2]</span>
<span class="go">          1        0</span>
<span class="go">          1        0</span>
<span class="go">          1        1</span>
<span class="go">          1        1</span>
<span class="go">          1        0</span>
<span class="go">          1        0</span>
<span class="go">          1        1</span>
<span class="go">          1        1</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;a&#39; (column 1)</span>
</pre></div>
</div>
<p>The <code class="docutils literal"><span class="pre">T.</span></code> notation is there to remind you that these columns are
treatment coded.</p>
<p>Interactions are also easy &#8211; they represent the cartesian product of
all the factors involved. Here&#8217;s a dummy coding of each <em>combination</em>
of values taken by <code class="docutils literal"><span class="pre">a</span></code> and <code class="docutils literal"><span class="pre">b</span></code>:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [28]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;0 + a:b&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[28]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 4)</span>
<span class="go">  a[a1]:b[b1]  a[a2]:b[b1]  a[a1]:b[b2]  a[a2]:b[b2]</span>
<span class="go">            1            0            0            0</span>
<span class="go">            0            0            1            0</span>
<span class="go">            0            1            0            0</span>
<span class="go">            0            0            0            1</span>
<span class="go">            1            0            0            0</span>
<span class="go">            0            0            1            0</span>
<span class="go">            0            1            0            0</span>
<span class="go">            0            0            0            1</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;a:b&#39; (columns 0:4)</span>
</pre></div>
</div>
<p>But interactions also know how to use contrast coding to avoid
redundancy. If you have both main effects and interactions in a model,
then Patsy goes from lower-order effects to higher-order effects,
adding in just enough columns to produce a well-defined model. The
result is that each set of columns measures the <em>additional</em>
contribution of this effect &#8211; just what you want for a traditional
ANOVA:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [29]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;a + b + a:b&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[29]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 4)</span>
<span class="go">  Intercept  a[T.a2]  b[T.b2]  a[T.a2]:b[T.b2]</span>
<span class="go">          1        0        0                0</span>
<span class="go">          1        0        1                0</span>
<span class="go">          1        1        0                0</span>
<span class="go">          1        1        1                1</span>
<span class="go">          1        0        0                0</span>
<span class="go">          1        0        1                0</span>
<span class="go">          1        1        0                0</span>
<span class="go">          1        1        1                1</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;a&#39; (column 1)</span>
<span class="go">    &#39;b&#39; (column 2)</span>
<span class="go">    &#39;a:b&#39; (column 3)</span>
</pre></div>
</div>
<p>Since this is so common, there&#8217;s a convenient short-hand:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [30]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;a*b&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[30]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 4)</span>
<span class="go">  Intercept  a[T.a2]  b[T.b2]  a[T.a2]:b[T.b2]</span>
<span class="go">          1        0        0                0</span>
<span class="go">          1        0        1                0</span>
<span class="go">          1        1        0                0</span>
<span class="go">          1        1        1                1</span>
<span class="go">          1        0        0                0</span>
<span class="go">          1        0        1                0</span>
<span class="go">          1        1        0                0</span>
<span class="go">          1        1        1                1</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;a&#39; (column 1)</span>
<span class="go">    &#39;b&#39; (column 2)</span>
<span class="go">    &#39;a:b&#39; (column 3)</span>
</pre></div>
</div>
<p>Of course you can use <a class="reference internal" href="API-reference.html#categorical-coding-ref"><span>other coding schemes</span></a> too (or even <a class="reference internal" href="categorical-coding.html#categorical-coding"><span>define your own</span></a>). Here&#8217;s <a class="reference internal" href="API-reference.html#patsy.Poly" title="patsy.Poly"><code class="xref py py-class docutils literal"><span class="pre">orthogonal</span> <span class="pre">polynomial</span> <span class="pre">coding</span></code></a>:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [31]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;C(c, Poly)&quot;</span><span class="p">,</span> <span class="p">{</span><span class="s">&quot;c&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s">&quot;c1&quot;</span><span class="p">,</span> <span class="s">&quot;c1&quot;</span><span class="p">,</span> <span class="s">&quot;c2&quot;</span><span class="p">,</span> <span class="s">&quot;c2&quot;</span><span class="p">,</span> <span class="s">&quot;c3&quot;</span><span class="p">,</span> <span class="s">&quot;c3&quot;</span><span class="p">]})</span>
<span class="gh">Out[31]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (6, 3)</span>
<span class="go">  Intercept  C(c, Poly).Linear  C(c, Poly).Quadratic</span>
<span class="go">          1           -0.70711               0.40825</span>
<span class="go">          1           -0.70711               0.40825</span>
<span class="go">          1           -0.00000              -0.81650</span>
<span class="go">          1           -0.00000              -0.81650</span>
<span class="go">          1            0.70711               0.40825</span>
<span class="go">          1            0.70711               0.40825</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;C(c, Poly)&#39; (columns 1:3)</span>
</pre></div>
</div>
<p>You can even write interactions between categorical and numerical
variables. Here we fit two different slope coefficients for <code class="docutils literal"><span class="pre">x1</span></code>;
one for the <code class="docutils literal"><span class="pre">a1</span></code> group, and one for the <code class="docutils literal"><span class="pre">a2</span></code> group:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [32]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;a:x1&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[32]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 3)</span>
<span class="go">  Intercept  a[a1]:x1  a[a2]:x1</span>
<span class="go">          1   1.76405   0.00000</span>
<span class="go">          1   0.40016   0.00000</span>
<span class="go">          1   0.00000   0.97874</span>
<span class="go">          1   0.00000   2.24089</span>
<span class="go">          1   1.86756   0.00000</span>
<span class="go">          1  -0.97728  -0.00000</span>
<span class="go">          1   0.00000   0.95009</span>
<span class="go">          1  -0.00000  -0.15136</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;a:x1&#39; (columns 1:3)</span>
</pre></div>
</div>
<p>The same redundancy avoidance code works here, so if you&#8217;d rather have
treatment-coded slopes (one slope for the <code class="docutils literal"><span class="pre">a1</span></code> group, and a second
for the difference between the <code class="docutils literal"><span class="pre">a1</span></code> and <code class="docutils literal"><span class="pre">a2</span></code> group slopes), then
you can request it like this:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="go"># compare to the difference between &quot;0 + a&quot; and &quot;1 + a&quot;</span>
<span class="gp">In [33]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;x1 + a:x1&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[33]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 3)</span>
<span class="go">  Intercept        x1  a[T.a2]:x1</span>
<span class="go">          1   1.76405     0.00000</span>
<span class="go">          1   0.40016     0.00000</span>
<span class="go">          1   0.97874     0.97874</span>
<span class="go">          1   2.24089     2.24089</span>
<span class="go">          1   1.86756     0.00000</span>
<span class="go">          1  -0.97728    -0.00000</span>
<span class="go">          1   0.95009     0.95009</span>
<span class="go">          1  -0.15136    -0.15136</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;x1&#39; (column 1)</span>
<span class="go">    &#39;a:x1&#39; (column 2)</span>
</pre></div>
</div>
<p>And more complex expressions work too:</p>
<div class="highlight-ipython"><div class="highlight"><pre><span class="gp">In [34]: </span><span class="n">dmatrix</span><span class="p">(</span><span class="s">&quot;C(a, Poly):center(x1)&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="gh">Out[34]: </span><span class="go"></span>
<span class="go">DesignMatrix with shape (8, 3)</span>
<span class="go">  Intercept  C(a, Poly).Constant:center(x1)  C(a, Poly).Linear:center(x1)</span>
<span class="go">          1                         0.87995                      -0.62222</span>
<span class="go">          1                        -0.48395                       0.34220</span>
<span class="go">          1                         0.09463                       0.06691</span>
<span class="go">          1                         1.35679                       0.95939</span>
<span class="go">          1                         0.98345                      -0.69541</span>
<span class="go">          1                        -1.86138                       1.31620</span>
<span class="go">          1                         0.06598                       0.04666</span>
<span class="go">          1                        -1.03546                      -0.73218</span>
<span class="go">  Terms:</span>
<span class="go">    &#39;Intercept&#39; (column 0)</span>
<span class="go">    &#39;C(a, Poly):center(x1)&#39; (columns 1:3)</span>
</pre></div>
</div>
</div>


          </div>
        </div>
      </div>
      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
        <div class="sphinxsidebarwrapper">
  <h4>Previous topic</h4>
  <p class="topless"><a href="overview.html"
                        title="previous chapter">Overview</a></p>
  <h4>Next topic</h4>
  <p class="topless"><a href="formulas.html"
                        title="next chapter">How formulas work</a></p>
  <div role="note" aria-label="source link">
    <h3>This Page</h3>
    <ul class="this-page-menu">
      <li><a href="_sources/quickstart.txt"
            rel="nofollow">Show Source</a></li>
    </ul>
   </div>
<div id="searchbox" style="display: none" role="search">
  <h3>Quick search</h3>
    <form class="search" action="search.html" method="get">
      <input type="text" name="q" />
      <input type="submit" value="Go" />
      <input type="hidden" name="check_keywords" value="yes" />
      <input type="hidden" name="area" value="default" />
    </form>
    <p class="searchtip" style="font-size: 90%">
    Enter search terms or a module, class or function name.
    </p>
</div>
<script type="text/javascript">$('#searchbox').show(0);</script>
        </div>
      </div>
      <div class="clearer"></div>
    </div>
    <div class="related" role="navigation" aria-label="related navigation">
      <h3>Navigation</h3>
      <ul>
        <li class="right" style="margin-right: 10px">
          <a href="genindex.html" title="General Index"
             >index</a></li>
        <li class="right" >
          <a href="py-modindex.html" title="Python Module Index"
             >modules</a> |</li>
        <li class="right" >
          <a href="formulas.html" title="How formulas work"
             >next</a> |</li>
        <li class="right" >
          <a href="overview.html" title="Overview"
             >previous</a> |</li>
        <li class="nav-item nav-item-0"><a href="index.html">patsy 0.4.1 documentation</a> &raquo;</li> 
      </ul>
    </div>
    <div class="footer" role="contentinfo">
        &copy; Copyright 2011-2015, Nathaniel J. Smith.
      Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.3.1.
    </div>
  </body>
</html>