Skip to content

sgnligo.transforms.whiten

An element to whiten time-series data and caclulate the power spectral density.

Whiten dataclass

Bases: TSTransform

Whiten input timeseries data.

Parameters:

Name Type Description Default
instrument Optional[str]

str, instrument to process. Used if reference-psd is given

None
whitening-method

str, currently supported types: (1) 'gwpy', (2) 'gstlal'

required
sample-rate

int, sample rate of the data

required
fft-length

int, length of fft in seconds used for whitening

required
nmed int

int, how many previous samples we should account for when calcualting the geometric mean of the psd

7
navg int

int, changes to the PSD must occur over a time scale of at least navg(n/2 - z)(1/sample_rate) *check cody's paper for more info

64
reference_psd Optional[str]

file, path to reference psd xml

None
psd_pad_name str

str, pad name of the psd output source pad

''
Source code in sgnligo/transforms/whiten.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
@dataclass
class Whiten(TSTransform):
    """Whiten input timeseries data.

    Args:
        instrument:
            str, instrument to process. Used if reference-psd is given
        whitening-method:
            str, currently supported types: (1) 'gwpy', (2) 'gstlal'
        sample-rate:
            int, sample rate of the data
        fft-length:
            int, length of fft in seconds used for whitening
        nmed:
            int, how many previous samples we should account for when calcualting the
            geometric mean of the psd
        navg:
            int, changes to the PSD must occur over a time scale of at least
            navg*(n/2 - z)*(1/sample_rate) *check cody's paper for more info
        reference_psd:
            file, path to reference psd xml
        psd_pad_name:
            str, pad name of the psd output source pad
    """

    instrument: Optional[str] = None
    psd_pad_name: str = ""
    whiten_pad_name: str = ""
    whitening_method: str = "gstlal"
    input_sample_rate: int = 16384
    whiten_sample_rate: int = 2048
    fft_length: int = 8
    nmed: int = 7
    navg: int = 64
    reference_psd: Optional[str] = None

    def __post_init__(self):
        assert len(self.sink_pad_names) == 1, "Only supports one sink pad"
        assert (
            len(self.source_pad_names) == 0
        ), "source_pad_names are derived from whiten_pad_name and psd_pad_name"
        assert self.whiten_pad_name and self.psd_pad_name
        self.source_pad_names = (self.whiten_pad_name, self.psd_pad_name)

        # define block overlap following arxiv:1604.04324
        self.n_input = int(self.fft_length * self.input_sample_rate)
        self.z_input = int(self.fft_length / 4 * self.input_sample_rate)
        self.hann_length_input = self.n_input - 2 * self.z_input
        overlap_input = self.hann_length_input // 2

        self.n_whiten = int(self.fft_length * self.whiten_sample_rate)
        self.z_whiten = int(self.fft_length / 4 * self.whiten_sample_rate)
        self.hann_length_whiten = self.n_whiten - 2 * self.z_whiten

        # init audio addapter
        self.adapter_config = AdapterConfig()
        self.adapter_config.overlap = (
            0,
            Offset.fromsamples(overlap_input, self.input_sample_rate),
        )
        self.adapter_config.stride = Offset.fromsamples(
            self.hann_length_input // 2, self.input_sample_rate
        )
        self.adapter_config.skip_gaps = True
        self.stride_samples = Offset.tosamples(
            self.adapter_config.stride, self.whiten_sample_rate
        )

        super().__post_init__()

        self.latest_psd = None
        self.output_frames = {p: None for p in self.source_pads}

        # set up for gstlal method:
        if self.whitening_method == "gstlal":
            # the offset of the first output buffer
            self.first_output_offset = None

            # keep track of number of instantaneous PSDs
            # we have calculated up to navg
            self.n_samples = 0

            # set requested sampling rates
            self.delta_f_whiten = 1 / (1 / self.whiten_sample_rate) / self.n_whiten
            self.delta_t_whiten = 1 / self.whiten_sample_rate
            self.lal_normalization_constant_whiten = 2 * self.delta_f_whiten

            # store last nmed instantaneous PSDs
            self.square_data_bufs = deque(maxlen=self.nmed)
            self.prev_data = None

            # initialize window functions
            # we apply a hann window to incoming raw data
            self.hann_input, self.hann_norm_input = self.hann_window(
                self.n_input, self.z_input
            )
            self.hann_whiten, self.hann_norm_whiten = self.hann_window(
                self.n_whiten, self.z_whiten
            )

            # we apply a tukey window on whitened data if we have zero-padding
            if self.z_whiten:
                self.tukey = self.tukey_window(
                    self.n_whiten, 2 * self.z_whiten / self.n_whiten
                )
            else:
                self.tukey = None

            # load reference PSD if provided
            if self.reference_psd:
                psd = lal.series.read_psd_xmldoc(
                    ligolw_utils.load_filename(
                        self.reference_psd,
                        verbose=True,
                        contenthandler=lal.series.PSDContentHandler,
                    )
                )
                psd = psd[self.instrument]

                # gstlal.condition
                # def psd_units_or_resolution_changed(elem, pspec, psd):
                # make sure units are set, compute scale factor
                # FIXME: what is this units?
                # units = lal.Unit(elem.get_property("psd-units"))
                # if units == lal.DimensionlessUnit:
                #    return
                # scale = float(psd.sampleUnits / units)
                scale = 1

                # get frequency resolution and number of bins
                fnyquist = self.whiten_sample_rate / 2
                n = int(round(fnyquist / self.delta_f_whiten) + 1)

                # interpolate and rescale PSD
                psd = interpolate_psd(psd, self.delta_f_whiten)
                ref_psd_data = psd.data.data[:n] * scale

                # install PSD in buffer history
                self.set_psd(ref_psd_data, self.navg)

    def tukey_window(self, length, beta):
        """
        XLALCreateTukeyREAL8Window
        https://lscsoft.docs.ligo.org/lalsuite/lal/_window_8c_source.html#l00597

        1.0 and flat in the middle, cos^2 transition at each end, zero
        at end points, 0.0 <= beta <= 1.0 sets what fraction of the
        window is transition (0 --> rectangle window, 1 --> Hann window)
        """
        if beta < 0 or beta > 1:
            raise ValueError("Invalid value for beta")

        transition_length = round(beta * length)

        out = np.ones(length)
        for i in range((transition_length + 1) // 2):
            o = np.cos(np.pi / 2 * Y(transition_length, i)) ** 2
            out[i] = o
            out[length - 1 - i] = o

        return out

    def hann_window(self, N, Z):
        """
        Define hann window
        Parameters:
        -----------
        N: int
            Number of samples in one window block
        Z: int
            Number of samples to zero pad
        """
        # array of indices
        k = np.arange(0, N, 1)

        hann = np.zeros(N)
        hann[Z : N - Z] = (np.sin(np.pi * (k[Z : N - Z] - Z) / (N - 2 * Z))) ** 2

        # FIXME gstlal had a method of adding from the two ends of the window
        # so that small numbers weren't added to big ones
        hann_norm = np.sqrt(N / np.sum(hann**2))

        return hann, hann_norm

    def median_bias(self, nn):
        """
        XLALMedianBias
        https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l00378
        """
        ans = 1
        n = (nn - 1) // 2
        for i in range(1, n + 1):
            ans -= 1.0 / (2 * i)
            ans += 1.0 / (2 * i + 1)

        return ans

    def log_median_bias_geometric(self, nn):
        """
        XLALLogMedianBiasGeometric
        https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01423
        """
        return np.log(self.median_bias(nn)) - nn * (loggamma(1 / nn) - np.log(nn))

    def add_psd(self, fdata):
        """
        XLALPSDRegressorAdd
        https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01632
        """
        self.square_data_bufs.append(np.abs(fdata) ** 2)

        if self.n_samples == 0:
            self.geometric_mean_square = np.log(self.square_data_bufs[0])
            self.n_samples += 1
        else:
            self.n_samples += 1
            self.n_samples = min(self.n_samples, self.navg)
            median_bias = self.log_median_bias_geometric(len(self.square_data_bufs))

            # FIXME: this is how XLALPSDRegressorAdd gets the median,
            # but this is not exactly the median when the number is even.
            # numpy takes the average of the middle two, while this gets
            # the larger one
            log_bin_median = np.log(
                np.sort(self.square_data_bufs, axis=0)[len(self.square_data_bufs) // 2]
            )
            self.geometric_mean_square = (
                self.geometric_mean_square * (self.n_samples - 1)
                + log_bin_median
                - median_bias
            ) / self.n_samples

    def get_psd(self, fdata):
        """
        XLALPSDRegressorGetPSD
        https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01773
        """
        # running average mode (track-psd)
        if self.n_samples == 0:
            out = self.lal_normalization_constant_whiten * (np.abs(fdata) ** 2)

            # set DC and Nyquist terms to zero
            # FIXME: gstlal had a condition if self.f0 == 0
            out[0] = 0
            out[self.n_whiten // 2] = 0
            return out
        else:
            return (
                np.exp(self.geometric_mean_square + EULERGAMMA)
                * self.lal_normalization_constant_whiten
            )

    def set_psd(self, ref_psd_data, weight):
        """
        XLALPSDRegressorSetPSD
        https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01831
        """
        arithmetic_mean_square_data = (
            ref_psd_data / self.lal_normalization_constant_whiten
        )

        # populate the buffer history with the ref psd
        for _i in range(self.nmed):
            self.square_data_bufs.append(arithmetic_mean_square_data)

        self.geometric_mean_square = np.log(arithmetic_mean_square_data) - EULERGAMMA
        self.n_samples = min(weight, self.navg)

    def internal(self):
        """
        Whiten incoming data in segments of fft-length seconds overlapped by fft-length
        * 3/4. If the data segment has N samples, we apply a zero-padded Hann window on
        the data with zero-padding of Z = N/4 samples. The Hann window length is then
        N - 2 * Z. The output stride is hann_length / 2.

        Example:
        --------
        fft_length = 4 sec
        sample_rate = 4
        N = 16
        Z = 4
        hann_length = 8
        output_stride = 4

        -- : input data
        .. : zero-padding
        ** : hann window
        [] : output buffer
        {} : output that will be added to the next iteration


                     *
                    * *
                   *   *
                  *     *
                 *
        1)   ....--------....
            -1s  0s      2s  3s
                 {add to next}


                         *
                        * *
                       *   *
                      *     *
                     *
        2)       ....--------....
                0s   1s      3s  4s
                [out]{add to next}


                             *
                            * *
                           *   *
                          *     *
                         *
        3)           ....--------....
                    1s   2s      4s  5s
                    [out]{add to next}


        Each fft-length of data will be windowed by the zero-padded Hann window, then
        FFTed to obtain the instantaneous PSD. The instantaneous PSD will be saved to
        a queue to calculate the running geometric mean of median PSDs, see
        arxiv:1604.04324. The running geometric mean of median PSDs from the last
        iteration will be used to whiten the current windowed-fft-length of data.
        The overlap segment from the previous output will be added to current whitened
        data. Finally the first output-stride samples of the whitened data will be put
        into the output buffer.

        Note that we will only start to produce output when the output offset is equal
        to or after the first input buffer, so the first iteration is a gap buffer.

        """
        super().internal()
        # incoming frame handling
        frame = self.preparedframes[self.sink_pads[0]]
        EOS = frame.EOS
        metadata = frame.metadata
        outoffsets = self.preparedoutoffsets[self.sink_pads[0]]

        if self.first_output_offset is None:
            self.first_output_offset = frame.offset

        padded_data_offset = outoffsets[0]["offset"] - Offset.fromsamples(
            self.z_whiten, self.whiten_sample_rate
        )

        # FIXME: haven't tested whether this works for gwpy method
        # FIXME: can we make this more general?
        if padded_data_offset < self.first_output_offset:
            # we are in the startup stage, don't output yet
            outoffset = outoffsets[0]["offset"]
            shape = (0,)
        else:
            outoffset = padded_data_offset
            shape = (
                Offset.tosamples(outoffsets[0]["noffset"], self.whiten_sample_rate),
            )

        # the epoch of the psd is the mid point of the most recent fft
        # which corresponds to the end offset of the output + half hann
        # length
        # FIXME: double check

        psd_epoch = int(
            Offset.tons(outoffset + outoffsets[0]["noffset"])
            + self.hann_length_whiten // 2 / self.whiten_sample_rate * 1e9
        )

        # if audioadapter hasn't given us a frame, then we have to wait for more
        # data before we can whiten. send a gap buffer
        if frame.is_gap:
            if (
                outoffsets[0]["noffset"] != 0
                and self.prev_data is not None
                and self.prev_data.shape[-1] > 0
            ):
                # drain the output history
                output_whitened_data = self.prev_data[: self.stride_samples]
                self.prev_data = self.prev_data[self.stride_samples :]
            else:
                output_whitened_data = None
        else:
            # retrieve samples from the deque
            assert len(frame.buffers) == 1, "Multiple buffers not implemented yet."
            buf = frame.buffers[0]
            this_seg_data = buf.data

            if self.whitening_method == "gwpy":
                # check the type of the timeseries data.
                # transform it to a gwpy.timeseries object
                if not isinstance(this_seg_data, TimeSeries):
                    this_seg_data = TimeSeries(this_seg_data)

                # whiten it
                whitened_data = this_seg_data.whiten(
                    fftlength=self.fft_length, overlap=0, window="hann"
                )

                # transform back to a numpy array
                whitened_data = np.array(whitened_data)

            elif self.whitening_method == "gstlal":
                # apply the window function
                this_seg_data = (
                    self.hann_input[self.z_input : -self.z_input]
                    * this_seg_data
                    * 1
                    / self.input_sample_rate
                    * self.hann_norm_input
                )
                this_seg_data = np.pad(this_seg_data, (self.z_input, self.z_input))

                # apply fourier transform
                freq_data = np.fft.rfft(this_seg_data)

                # get frequency bins
                freqs = np.fft.rfftfreq(
                    this_seg_data.size, d=1 / self.input_sample_rate
                )

                # downsampling
                freq_data = freq_data[
                    : int(
                        self.whiten_sample_rate
                        / self.input_sample_rate
                        * freq_data.shape[-1]
                    )
                    + 1
                ]

                # get the latest PSD
                this_psd = self.get_psd(freq_data)

                # store the latest spectrum so we can output on spectrum pad
                f0 = freqs[0]
                self.latest_psd = lal.CreateREAL8FrequencySeries(
                    "new_psd",
                    psd_epoch / 1e9,
                    f0,
                    self.delta_f_whiten,
                    "s strain^2",
                    len(this_psd),
                )
                self.latest_psd.data.data = this_psd

                # push freq data into psd history
                self.add_psd(freq_data)

                # Whitening
                # the DC and Nyquist terms are zero
                freq_data_whitened = np.zeros_like(freq_data)
                freq_data_whitened[1:-1] = freq_data[1:-1] * np.sqrt(
                    self.lal_normalization_constant_whiten / this_psd[1:-1]
                )

                # Fourier Transform back to the time domain
                # # see arxiv: 1604.04324 (13)
                # self.delta_f scaling https://lscsoft.docs.ligo.org/lalsuite/lal/
                # _time_freq_f_f_t_8c_source.html#l00183
                whitened_data = (
                    np.fft.irfft(freq_data_whitened, self.n_whiten, norm="forward")
                    * self.delta_f_whiten
                )
                whitened_data *= self.delta_t_whiten * np.sqrt(
                    np.sum(self.hann_whiten**2)
                )

                if self.tukey is not None:
                    whitened_data *= self.tukey

                # accounts for overlap by summing with prev_data over the
                # stride of the adapter
                if self.prev_data is not None:
                    whitened_data[: self.prev_data.shape[-1]] += self.prev_data
                self.prev_data = whitened_data[self.stride_samples :]

            # FIXME: haven't tested whether this works for gwpy method
            # FIXME: can we make this more general?
            # only output data up till the length of the adapter stride
            if padded_data_offset < self.first_output_offset:
                # output a gap buffer during the startup period of the whitening
                # calculation
                output_whitened_data = None
            else:
                output_whitened_data = whitened_data[: self.stride_samples]

        # passes the spectrum in metadata if the pad is the psd_pad
        # FIXME: use EventBuffers
        if output_whitened_data is None:
            metadata["psd"] = None
            metadata["navg"] = None
            metadata["n_samples"] = None
        else:
            metadata["psd"] = self.latest_psd
            metadata["navg"] = self.navg
            metadata["n_samples"] = self.n_samples

        metadata["epoch"] = psd_epoch

        self.output_frames[self.srcs[self.psd_pad_name]] = TSFrame(
            buffers=[
                SeriesBuffer(
                    offset=outoffset,
                    sample_rate=self.whiten_sample_rate,
                    data=None,
                    shape=shape,
                )
            ],
            EOS=EOS,
            metadata=metadata,
        )

        self.output_frames[self.srcs[self.whiten_pad_name]] = TSFrame(
            buffers=[
                SeriesBuffer(
                    offset=outoffset,
                    sample_rate=self.whiten_sample_rate,
                    data=output_whitened_data,
                    shape=shape,
                )
            ],
            EOS=EOS,
            metadata=metadata,
        )

    def new(self, pad):
        return self.output_frames[pad]

add_psd(fdata)

XLALPSDRegressorAdd https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01632

Source code in sgnligo/transforms/whiten.py
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
def add_psd(self, fdata):
    """
    XLALPSDRegressorAdd
    https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01632
    """
    self.square_data_bufs.append(np.abs(fdata) ** 2)

    if self.n_samples == 0:
        self.geometric_mean_square = np.log(self.square_data_bufs[0])
        self.n_samples += 1
    else:
        self.n_samples += 1
        self.n_samples = min(self.n_samples, self.navg)
        median_bias = self.log_median_bias_geometric(len(self.square_data_bufs))

        # FIXME: this is how XLALPSDRegressorAdd gets the median,
        # but this is not exactly the median when the number is even.
        # numpy takes the average of the middle two, while this gets
        # the larger one
        log_bin_median = np.log(
            np.sort(self.square_data_bufs, axis=0)[len(self.square_data_bufs) // 2]
        )
        self.geometric_mean_square = (
            self.geometric_mean_square * (self.n_samples - 1)
            + log_bin_median
            - median_bias
        ) / self.n_samples

get_psd(fdata)

XLALPSDRegressorGetPSD https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01773

Source code in sgnligo/transforms/whiten.py
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
def get_psd(self, fdata):
    """
    XLALPSDRegressorGetPSD
    https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01773
    """
    # running average mode (track-psd)
    if self.n_samples == 0:
        out = self.lal_normalization_constant_whiten * (np.abs(fdata) ** 2)

        # set DC and Nyquist terms to zero
        # FIXME: gstlal had a condition if self.f0 == 0
        out[0] = 0
        out[self.n_whiten // 2] = 0
        return out
    else:
        return (
            np.exp(self.geometric_mean_square + EULERGAMMA)
            * self.lal_normalization_constant_whiten
        )

hann_window(N, Z)

Define hann window Parameters:


N: int Number of samples in one window block Z: int Number of samples to zero pad

Source code in sgnligo/transforms/whiten.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
def hann_window(self, N, Z):
    """
    Define hann window
    Parameters:
    -----------
    N: int
        Number of samples in one window block
    Z: int
        Number of samples to zero pad
    """
    # array of indices
    k = np.arange(0, N, 1)

    hann = np.zeros(N)
    hann[Z : N - Z] = (np.sin(np.pi * (k[Z : N - Z] - Z) / (N - 2 * Z))) ** 2

    # FIXME gstlal had a method of adding from the two ends of the window
    # so that small numbers weren't added to big ones
    hann_norm = np.sqrt(N / np.sum(hann**2))

    return hann, hann_norm

internal()

Whiten incoming data in segments of fft-length seconds overlapped by fft-length * 3/4. If the data segment has N samples, we apply a zero-padded Hann window on the data with zero-padding of Z = N/4 samples. The Hann window length is then N - 2 * Z. The output stride is hann_length / 2.

Example:

fft_length = 4 sec sample_rate = 4 N = 16 Z = 4 hann_length = 8 output_stride = 4

-- : input data .. : zero-padding ** : hann window [] : output buffer {} : output that will be added to the next iteration

         *
        * *
       *   *
      *     *
     *

1) ....--------.... -1s 0s 2s 3s {add to next}

             *
            * *
           *   *
          *     *
         *

2) ....--------.... 0s 1s 3s 4s [out]{add to next}

                 *
                * *
               *   *
              *     *
             *

3) ....--------.... 1s 2s 4s 5s [out]{add to next}

Each fft-length of data will be windowed by the zero-padded Hann window, then FFTed to obtain the instantaneous PSD. The instantaneous PSD will be saved to a queue to calculate the running geometric mean of median PSDs, see arxiv:1604.04324. The running geometric mean of median PSDs from the last iteration will be used to whiten the current windowed-fft-length of data. The overlap segment from the previous output will be added to current whitened data. Finally the first output-stride samples of the whitened data will be put into the output buffer.

Note that we will only start to produce output when the output offset is equal to or after the first input buffer, so the first iteration is a gap buffer.

Source code in sgnligo/transforms/whiten.py
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
def internal(self):
    """
    Whiten incoming data in segments of fft-length seconds overlapped by fft-length
    * 3/4. If the data segment has N samples, we apply a zero-padded Hann window on
    the data with zero-padding of Z = N/4 samples. The Hann window length is then
    N - 2 * Z. The output stride is hann_length / 2.

    Example:
    --------
    fft_length = 4 sec
    sample_rate = 4
    N = 16
    Z = 4
    hann_length = 8
    output_stride = 4

    -- : input data
    .. : zero-padding
    ** : hann window
    [] : output buffer
    {} : output that will be added to the next iteration


                 *
                * *
               *   *
              *     *
             *
    1)   ....--------....
        -1s  0s      2s  3s
             {add to next}


                     *
                    * *
                   *   *
                  *     *
                 *
    2)       ....--------....
            0s   1s      3s  4s
            [out]{add to next}


                         *
                        * *
                       *   *
                      *     *
                     *
    3)           ....--------....
                1s   2s      4s  5s
                [out]{add to next}


    Each fft-length of data will be windowed by the zero-padded Hann window, then
    FFTed to obtain the instantaneous PSD. The instantaneous PSD will be saved to
    a queue to calculate the running geometric mean of median PSDs, see
    arxiv:1604.04324. The running geometric mean of median PSDs from the last
    iteration will be used to whiten the current windowed-fft-length of data.
    The overlap segment from the previous output will be added to current whitened
    data. Finally the first output-stride samples of the whitened data will be put
    into the output buffer.

    Note that we will only start to produce output when the output offset is equal
    to or after the first input buffer, so the first iteration is a gap buffer.

    """
    super().internal()
    # incoming frame handling
    frame = self.preparedframes[self.sink_pads[0]]
    EOS = frame.EOS
    metadata = frame.metadata
    outoffsets = self.preparedoutoffsets[self.sink_pads[0]]

    if self.first_output_offset is None:
        self.first_output_offset = frame.offset

    padded_data_offset = outoffsets[0]["offset"] - Offset.fromsamples(
        self.z_whiten, self.whiten_sample_rate
    )

    # FIXME: haven't tested whether this works for gwpy method
    # FIXME: can we make this more general?
    if padded_data_offset < self.first_output_offset:
        # we are in the startup stage, don't output yet
        outoffset = outoffsets[0]["offset"]
        shape = (0,)
    else:
        outoffset = padded_data_offset
        shape = (
            Offset.tosamples(outoffsets[0]["noffset"], self.whiten_sample_rate),
        )

    # the epoch of the psd is the mid point of the most recent fft
    # which corresponds to the end offset of the output + half hann
    # length
    # FIXME: double check

    psd_epoch = int(
        Offset.tons(outoffset + outoffsets[0]["noffset"])
        + self.hann_length_whiten // 2 / self.whiten_sample_rate * 1e9
    )

    # if audioadapter hasn't given us a frame, then we have to wait for more
    # data before we can whiten. send a gap buffer
    if frame.is_gap:
        if (
            outoffsets[0]["noffset"] != 0
            and self.prev_data is not None
            and self.prev_data.shape[-1] > 0
        ):
            # drain the output history
            output_whitened_data = self.prev_data[: self.stride_samples]
            self.prev_data = self.prev_data[self.stride_samples :]
        else:
            output_whitened_data = None
    else:
        # retrieve samples from the deque
        assert len(frame.buffers) == 1, "Multiple buffers not implemented yet."
        buf = frame.buffers[0]
        this_seg_data = buf.data

        if self.whitening_method == "gwpy":
            # check the type of the timeseries data.
            # transform it to a gwpy.timeseries object
            if not isinstance(this_seg_data, TimeSeries):
                this_seg_data = TimeSeries(this_seg_data)

            # whiten it
            whitened_data = this_seg_data.whiten(
                fftlength=self.fft_length, overlap=0, window="hann"
            )

            # transform back to a numpy array
            whitened_data = np.array(whitened_data)

        elif self.whitening_method == "gstlal":
            # apply the window function
            this_seg_data = (
                self.hann_input[self.z_input : -self.z_input]
                * this_seg_data
                * 1
                / self.input_sample_rate
                * self.hann_norm_input
            )
            this_seg_data = np.pad(this_seg_data, (self.z_input, self.z_input))

            # apply fourier transform
            freq_data = np.fft.rfft(this_seg_data)

            # get frequency bins
            freqs = np.fft.rfftfreq(
                this_seg_data.size, d=1 / self.input_sample_rate
            )

            # downsampling
            freq_data = freq_data[
                : int(
                    self.whiten_sample_rate
                    / self.input_sample_rate
                    * freq_data.shape[-1]
                )
                + 1
            ]

            # get the latest PSD
            this_psd = self.get_psd(freq_data)

            # store the latest spectrum so we can output on spectrum pad
            f0 = freqs[0]
            self.latest_psd = lal.CreateREAL8FrequencySeries(
                "new_psd",
                psd_epoch / 1e9,
                f0,
                self.delta_f_whiten,
                "s strain^2",
                len(this_psd),
            )
            self.latest_psd.data.data = this_psd

            # push freq data into psd history
            self.add_psd(freq_data)

            # Whitening
            # the DC and Nyquist terms are zero
            freq_data_whitened = np.zeros_like(freq_data)
            freq_data_whitened[1:-1] = freq_data[1:-1] * np.sqrt(
                self.lal_normalization_constant_whiten / this_psd[1:-1]
            )

            # Fourier Transform back to the time domain
            # # see arxiv: 1604.04324 (13)
            # self.delta_f scaling https://lscsoft.docs.ligo.org/lalsuite/lal/
            # _time_freq_f_f_t_8c_source.html#l00183
            whitened_data = (
                np.fft.irfft(freq_data_whitened, self.n_whiten, norm="forward")
                * self.delta_f_whiten
            )
            whitened_data *= self.delta_t_whiten * np.sqrt(
                np.sum(self.hann_whiten**2)
            )

            if self.tukey is not None:
                whitened_data *= self.tukey

            # accounts for overlap by summing with prev_data over the
            # stride of the adapter
            if self.prev_data is not None:
                whitened_data[: self.prev_data.shape[-1]] += self.prev_data
            self.prev_data = whitened_data[self.stride_samples :]

        # FIXME: haven't tested whether this works for gwpy method
        # FIXME: can we make this more general?
        # only output data up till the length of the adapter stride
        if padded_data_offset < self.first_output_offset:
            # output a gap buffer during the startup period of the whitening
            # calculation
            output_whitened_data = None
        else:
            output_whitened_data = whitened_data[: self.stride_samples]

    # passes the spectrum in metadata if the pad is the psd_pad
    # FIXME: use EventBuffers
    if output_whitened_data is None:
        metadata["psd"] = None
        metadata["navg"] = None
        metadata["n_samples"] = None
    else:
        metadata["psd"] = self.latest_psd
        metadata["navg"] = self.navg
        metadata["n_samples"] = self.n_samples

    metadata["epoch"] = psd_epoch

    self.output_frames[self.srcs[self.psd_pad_name]] = TSFrame(
        buffers=[
            SeriesBuffer(
                offset=outoffset,
                sample_rate=self.whiten_sample_rate,
                data=None,
                shape=shape,
            )
        ],
        EOS=EOS,
        metadata=metadata,
    )

    self.output_frames[self.srcs[self.whiten_pad_name]] = TSFrame(
        buffers=[
            SeriesBuffer(
                offset=outoffset,
                sample_rate=self.whiten_sample_rate,
                data=output_whitened_data,
                shape=shape,
            )
        ],
        EOS=EOS,
        metadata=metadata,
    )

log_median_bias_geometric(nn)

XLALLogMedianBiasGeometric https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01423

Source code in sgnligo/transforms/whiten.py
298
299
300
301
302
303
def log_median_bias_geometric(self, nn):
    """
    XLALLogMedianBiasGeometric
    https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01423
    """
    return np.log(self.median_bias(nn)) - nn * (loggamma(1 / nn) - np.log(nn))

median_bias(nn)

XLALMedianBias https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l00378

Source code in sgnligo/transforms/whiten.py
285
286
287
288
289
290
291
292
293
294
295
296
def median_bias(self, nn):
    """
    XLALMedianBias
    https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l00378
    """
    ans = 1
    n = (nn - 1) // 2
    for i in range(1, n + 1):
        ans -= 1.0 / (2 * i)
        ans += 1.0 / (2 * i + 1)

    return ans

set_psd(ref_psd_data, weight)

XLALPSDRegressorSetPSD https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01831

Source code in sgnligo/transforms/whiten.py
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
def set_psd(self, ref_psd_data, weight):
    """
    XLALPSDRegressorSetPSD
    https://lscsoft.docs.ligo.org/lalsuite/lal/_average_spectrum_8c_source.html#l01831
    """
    arithmetic_mean_square_data = (
        ref_psd_data / self.lal_normalization_constant_whiten
    )

    # populate the buffer history with the ref psd
    for _i in range(self.nmed):
        self.square_data_bufs.append(arithmetic_mean_square_data)

    self.geometric_mean_square = np.log(arithmetic_mean_square_data) - EULERGAMMA
    self.n_samples = min(weight, self.navg)

tukey_window(length, beta)

XLALCreateTukeyREAL8Window https://lscsoft.docs.ligo.org/lalsuite/lal/_window_8c_source.html#l00597

1.0 and flat in the middle, cos^2 transition at each end, zero at end points, 0.0 <= beta <= 1.0 sets what fraction of the window is transition (0 --> rectangle window, 1 --> Hann window)

Source code in sgnligo/transforms/whiten.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
def tukey_window(self, length, beta):
    """
    XLALCreateTukeyREAL8Window
    https://lscsoft.docs.ligo.org/lalsuite/lal/_window_8c_source.html#l00597

    1.0 and flat in the middle, cos^2 transition at each end, zero
    at end points, 0.0 <= beta <= 1.0 sets what fraction of the
    window is transition (0 --> rectangle window, 1 --> Hann window)
    """
    if beta < 0 or beta > 1:
        raise ValueError("Invalid value for beta")

    transition_length = round(beta * length)

    out = np.ones(length)
    for i in range((transition_length + 1) // 2):
        o = np.cos(np.pi / 2 * Y(transition_length, i)) ** 2
        out[i] = o
        out[length - 1 - i] = o

    return out

Y(length, i)

https://lscsoft.docs.ligo.org/lalsuite/lal/_window_8c_source.html#l00109

Maps the length of a window and the offset within the window to the "y" co-ordinate of the LAL documentation.

Input: length > 0, 0 <= i < length

Output: length < 2 --> return 0.0 i == 0 --> return -1.0 i == (length - 1) / 2 --> return 0.0 i == length - 1 --> return +1.0

e.g., length = 5 (odd), then i == 2 --> return 0.0 if length = 6 (even), then i == 2.5 --> return 0.0

(in the latter case, obviously i can't be a non-integer, but that's the value it would have to be for this function to return 0.0)

Source code in sgnligo/transforms/whiten.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def Y(length, i):
    """
    https://lscsoft.docs.ligo.org/lalsuite/lal/_window_8c_source.html#l00109

    Maps the length of a window and the offset within the window to the "y"
    co-ordinate of the LAL documentation.

    Input:
    length > 0,
    0 <= i < length

    Output:
    length < 2 --> return 0.0
    i == 0 --> return -1.0
    i == (length - 1) / 2 --> return 0.0
    i == length - 1 --> return +1.0

    e.g., length = 5 (odd), then i == 2 --> return 0.0
    if length = 6 (even), then i == 2.5 --> return 0.0

    (in the latter case, obviously i can't be a non-integer, but that's the
    value it would have to be for this function to return 0.0)
    """
    length -= 1
    return (2 * i - length) / length if length > 0 else 0

interpolate_psd(psd, deltaF)

Interpolates a PSD to a target frequency resolution.

Parameters:

Name Type Description Default
psd REAL8FrequencySeries

lal.REAL8FrequencySeries, the PSD to interpolate

required
deltaF int

int, the target frequency resolution to interpolate to

required

Returns:

Type Description
REAL8FrequencySeries

lal.REAL8FrequencySeries, the interpolated PSD

Source code in sgnligo/transforms/whiten.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def interpolate_psd(
    psd: lal.REAL8FrequencySeries, deltaF: int
) -> lal.REAL8FrequencySeries:
    """Interpolates a PSD to a target frequency resolution.

    Args:
        psd:
            lal.REAL8FrequencySeries, the PSD to interpolate
        deltaF:
            int, the target frequency resolution to interpolate to

    Returns:
        lal.REAL8FrequencySeries, the interpolated PSD

    """
    # no-op?
    if deltaF == psd.deltaF:
        return psd

    # interpolate log(PSD) with cubic spline.  note that the PSD is
    # clipped at 1e-300 to prevent nan's in the interpolator (which
    # doesn't seem to like the occasional sample being -inf)
    psd_data = psd.data.data
    psd_data = np.where(psd_data, psd_data, 1e-300)
    f = psd.f0 + np.arange(len(psd_data)) * psd.deltaF
    interp = interpolate.splrep(f, np.log(psd_data), s=0)
    f = (
        psd.f0
        + np.arange(round((len(psd_data) - 1) * psd.deltaF / deltaF) + 1) * deltaF
    )
    psd_data = np.exp(interpolate.splev(f, interp, der=0))

    # return result
    psd = lal.CreateREAL8FrequencySeries(
        name=psd.name,
        epoch=psd.epoch,
        f0=psd.f0,
        deltaF=deltaF,
        sampleUnits=psd.sampleUnits,
        length=len(psd_data),
    )
    psd.data.data = psd_data

    return psd