ddx 0.6.0
Libary for domain-decomposition methods for polarizable continuum models
ddx_harmonics.f90
Go to the documentation of this file.
1
14
17
18! Get all compile-time definitions
20
21! Disable implicit types
22implicit none
23
24contains
25
37subroutine ylmscale(p, vscales, v4pi2lp1, vscales_rel)
38 ! Input
39 integer, intent(in) :: p
40 ! Output
41 real(dp), intent(out) :: vscales((p+1)**2), v4pi2lp1(p+1), &
42 & vscales_rel((p+1)**2)
43 ! Local variables
44 real(dp) :: tmp, twolp1
45 integer :: l, ind, m
46 twolp1 = one
47 do l = 0, p
48 ! m = 0
49 ind = l*l + l + 1
50 tmp = fourpi / twolp1
51 v4pi2lp1(l+1) = tmp
52 tmp = sqrt(tmp)
53 vscales_rel(ind) = tmp
54 vscales(ind) = one / tmp
55 twolp1 = twolp1 + two
56 tmp = vscales(ind) * sqrt2
57 ! m != 0
58 do m = 1, l
59 tmp = -tmp / sqrt(dble((l-m+1)*(l+m)))
60 vscales(ind+m) = tmp
61 vscales(ind-m) = tmp
62 vscales_rel(ind+m) = tmp * v4pi2lp1(l+1)
63 vscales_rel(ind-m) = vscales_rel(ind+m)
64 end do
65 end do
66end subroutine ylmscale
67
80subroutine fmm_constants(dmax, pm, pl, vcnk, m2l_ztranslate_coef, &
81 & m2l_ztranslate_adj_coef)
82 ! Inputs
83 integer, intent(in) :: dmax, pm, pl
84 ! Outputs
85 real(dp), intent(out) :: vcnk((2*dmax+1)*(dmax+1)), &
86 & m2l_ztranslate_coef(pm+1, pl+1, pl+1), &
87 & m2l_ztranslate_adj_coef(pl+1, pl+1, pm+1)
88 ! Local variables
89 integer :: i, indi, j, k, n, indjn
90 real(dp) :: tmp1
91 ! Compute combinatorial numbers C_n^k for n=0..dmax
92 ! C_0^0 = 1
93 vcnk(1) = one
94 do i = 2, 2*dmax+1
95 ! Offset to the C_{i-2}^{i-2}, next item to be stored is C_{i-1}^0
96 indi = (i-1) * i / 2
97 ! C_{i-1}^0 = 1
98 vcnk(indi+1) = one
99 ! C_{i-1}^{i-1} = 1
100 vcnk(indi+i) = one
101 ! C_{i-1}^{j-1} = C_{i-2}^{j-1} + C_{i-2}^{j-2}
102 ! Offset to C_{i-3}^{i-3} is indi-i+1
103 do j = 2, i-1
104 vcnk(indi+j) = vcnk(indi-i+j+1) + vcnk(indi-i+j)
105 end do
106 end do
107 ! Get square roots of C_n^k. sqrt(one) is one, so no need to update C_n^0
108 ! and C_n^n
109 do i = 3, 2*dmax+1
110 indi = (i-1) * i / 2
111 do j = 2, i-1
112 vcnk(indi+j) = sqrt(vcnk(indi+j))
113 end do
114 end do
115 ! Fill in m2l_ztranslate_coef and m2l_ztranslate_adj_coef
116 do j = 0, pl
117 do k = 0, j
118 tmp1 = one
119 do n = k, pm
120 indjn = (j+n)*(j+n+1)/2 + 1
121 m2l_ztranslate_coef(n-k+1, k+1, j-k+1) = &
122 & tmp1 * vcnk(indjn+j-k) * vcnk(indjn+j+k)
123 m2l_ztranslate_adj_coef(j-k+1, k+1, n-k+1) = &
124 & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)
125 tmp1 = -tmp1
126 end do
127 end do
128 end do
129end subroutine fmm_constants
130
142subroutine carttosph(x, rho, ctheta, stheta, cphi, sphi)
143 ! Input
144 real(dp), intent(in) :: x(3)
145 ! Output
146 real(dp), intent(out) :: rho, ctheta, stheta, cphi, sphi
147 ! Local variables
148 real(dp) :: max12, ssq12
149 ! Check x(1:2) = 0
150 if ((x(1) .eq. zero) .and. (x(2) .eq. zero)) then
151 rho = abs(x(3))
152 ctheta = sign(one, x(3))
153 stheta = zero
154 cphi = one
155 sphi = zero
156 return
157 end if
158 ! In other situations use sum-of-scaled-squares technique
159 ! Get norm of x(1:2) and cphi with sphi outputs
160 if (abs(x(2)) .gt. abs(x(1))) then
161 max12 = abs(x(2))
162 ssq12 = one + (x(1)/x(2))**2
163 else
164 max12 = abs(x(1))
165 ssq12 = one + (x(2)/x(1))**2
166 end if
167 stheta = max12 * sqrt(ssq12)
168 cphi = x(1) / stheta
169 sphi = x(2) / stheta
170 ! Then compute rho, ctheta and stheta outputs
171 if (abs(x(3)) .gt. max12) then
172 rho = one + ssq12*(max12/x(3))**2
173 rho = abs(x(3)) * sqrt(rho)
174 stheta = stheta / rho
175 ctheta = x(3) / rho
176 else
177 rho = ssq12 + (x(3)/max12)**2
178 rho = max12 * sqrt(rho)
179 stheta = stheta / rho
180 ctheta = x(3) / rho
181 end if
182end subroutine carttosph
183
214subroutine ylmbas(x, rho, ctheta, stheta, cphi, sphi, p, vscales, vylm, vplm, &
215 & vcos, vsin)
216 ! Inputs
217 integer, intent(in) :: p
218 real(dp), intent(in) :: x(3)
219 real(dp), intent(in) :: vscales((p+1)**2)
220 ! Outputs
221 real(dp), intent(out) :: rho, ctheta, stheta, cphi, sphi
222 real(dp), intent(out) :: vylm((p+1)**2), vplm((p+1)**2)
223 real(dp), intent(out) :: vcos(p+1), vsin(p+1)
224 ! Local variables
225 integer :: l, m, ind
226 real(dp) :: max12, ssq12, tmp
227 ! Get rho cos(theta), sin(theta), cos(phi) and sin(phi) from the cartesian
228 ! coordinates of x. To support full range of inputs we do it via a scale
229 ! and a sum of squares technique.
230 ! At first we compute x(1)**2 + x(2)**2
231 if (x(1) .eq. zero) then
232 max12 = abs(x(2))
233 ssq12 = one
234 else if (abs(x(2)) .gt. abs(x(1))) then
235 max12 = abs(x(2))
236 ssq12 = one + (x(1)/x(2))**2
237 else
238 max12 = abs(x(1))
239 ssq12 = one + (x(2)/x(1))**2
240 end if
241 ! Then we compute rho
242 if (x(3) .eq. zero) then
243 rho = max12 * sqrt(ssq12)
244 else if (abs(x(3)) .gt. max12) then
245 rho = one + ssq12 *(max12/x(3))**2
246 rho = abs(x(3)) * sqrt(rho)
247 else
248 rho = ssq12 + (x(3)/max12)**2
249 rho = max12 * sqrt(rho)
250 end if
251 ! In case x=0 just exit without setting any other variable
252 if (rho .eq. zero) then
253 return
254 end if
255 ! Length of a vector x(1:2)
256 stheta = max12 * sqrt(ssq12)
257 ! Case x(1:2) != 0
258 if (stheta .ne. zero) then
259 ! Normalize cphi and sphi
260 cphi = x(1) / stheta
261 sphi = x(2) / stheta
262 ! Normalize ctheta and stheta
263 ctheta = x(3) / rho
264 stheta = stheta / rho
265 ! Treat easy cases
266 select case(p)
267 case (0)
268 vylm(1) = vscales(1)
269 return
270 case (1)
271 vylm(1) = vscales(1)
272 vylm(2) = - vscales(4) * stheta * sphi
273 vylm(3) = vscales(3) * ctheta
274 vylm(4) = - vscales(4) * stheta * cphi
275 return
276 end select
277 ! Evaluate associated Legendre polynomials, size of temporary workspace
278 ! here needs only (p+1) elements so we use vcos as a temporary
279 call polleg_work(ctheta, stheta, p, vplm, vcos)
280 ! Evaluate cos(m*phi) and sin(m*phi) arrays
281 call trgev(cphi, sphi, p, vcos, vsin)
282 ! Construct spherical harmonics
283 ! l = 0
284 vylm(1) = vscales(1)
285 ! l = 1
286 vylm(2) = - vscales(4) * stheta * sphi
287 vylm(3) = vscales(3) * ctheta
288 vylm(4) = - vscales(4) * stheta * cphi
289 ind = 3
290 do l = 2, p
291 ! Offset of a Y_l^0 harmonic in vplm and vylm arrays
292 ind = ind + 2*l
293 !l**2 + l + 1
294 ! m = 0 implicitly uses `vcos(1) = 1`
295 vylm(ind) = vscales(ind) * vplm(ind)
296 do m = 1, l
297 ! only P_l^m for non-negative m is used/defined
298 tmp = vplm(ind+m) * vscales(ind+m)
299 ! m > 0
300 vylm(ind+m) = tmp * vcos(m+1)
301 ! m < 0
302 vylm(ind-m) = tmp * vsin(m+1)
303 end do
304 end do
305 ! Case of x(1:2) = 0 and x(3) != 0
306 else
307 ! Set spherical coordinates
308 cphi = one
309 sphi = zero
310 ctheta = sign(one, x(3))
311 stheta = zero
312 ! Set output arrays vcos and vsin
313 vcos = one
314 vsin = zero
315 ! Evaluate spherical harmonics. P_l^m = 0 for m > 0. In the case m = 0
316 ! it depends if l is odd or even. Additionally, vcos = one and vsin =
317 ! zero for all elements
318 vylm = zero
319 vplm = zero
320 do l = 0, p, 2
321 ind = l**2 + l + 1
322 ! only case m = 0
323 vplm(ind) = one
324 vylm(ind) = vscales(ind)
325 end do
326 do l = 1, p, 2
327 ind = l**2 + l + 1
328 ! only case m = 0
329 vplm(ind) = ctheta
330 vylm(ind) = ctheta * vscales(ind)
331 end do
332 end if
333end subroutine ylmbas
334
348subroutine trgev(cphi, sphi, p, vcos, vsin)
349 ! Inputs
350 integer, intent(in) :: p
351 real(dp), intent(in) :: cphi, sphi
352 ! Output
353 real(dp), intent(out) :: vcos(p+1), vsin(p+1)
354 ! Local variables
355 integer :: m
356 real(dp) :: c4phi, s4phi
357 !! Treat values of p from 0 to 3 differently
358 select case(p)
359 ! Do nothing if p < 0
360 case (:-1)
361 return
362 ! p = 0
363 case (0)
364 vcos(1) = one
365 vsin(1) = zero
366 return
367 ! p = 1
368 case (1)
369 vcos(1) = one
370 vsin(1) = zero
371 vcos(2) = cphi
372 vsin(2) = sphi
373 return
374 ! p = 2
375 case (2)
376 vcos(1) = one
377 vsin(1) = zero
378 vcos(2) = cphi
379 vsin(2) = sphi
380 vcos(3) = cphi**2 - sphi**2
381 vsin(3) = 2 * cphi * sphi
382 return
383 ! p = 3
384 case (3)
385 vcos(1) = one
386 vsin(1) = zero
387 vcos(2) = cphi
388 vsin(2) = sphi
389 vcos(3) = cphi**2 - sphi**2
390 vsin(3) = 2 * cphi * sphi
391 vcos(4) = vcos(3)*cphi - vsin(3)*sphi
392 vsin(4) = vcos(3)*sphi + vsin(3)*cphi
393 return
394 ! p >= 4
395 case default
396 vcos(1) = one
397 vsin(1) = zero
398 vcos(2) = cphi
399 vsin(2) = sphi
400 vcos(3) = cphi**2 - sphi**2
401 vsin(3) = 2 * cphi * sphi
402 vcos(4) = vcos(3)*cphi - vsin(3)*sphi
403 vsin(4) = vcos(3)*sphi + vsin(3)*cphi
404 vcos(5) = vcos(3)**2 - vsin(3)**2
405 vsin(5) = 2 * vcos(3) * vsin(3)
406 c4phi = vcos(5)
407 s4phi = vsin(5)
408 end select
409 ! Define cos(m*phi) and sin(m*phi) recurrently 4 values at a time
410 do m = 6, p-2, 4
411 vcos(m:m+3) = vcos(m-4:m-1)*c4phi - vsin(m-4:m-1)*s4phi
412 vsin(m:m+3) = vcos(m-4:m-1)*s4phi + vsin(m-4:m-1)*c4phi
413 end do
414 ! Work with leftover
415 select case(m-p)
416 case (-1)
417 vcos(p-1) = vcos(p-2)*cphi - vsin(p-2)*sphi
418 vsin(p-1) = vcos(p-2)*sphi + vsin(p-2)*cphi
419 vcos(p) = vcos(p-2)*vcos(3) - vsin(p-2)*vsin(3)
420 vsin(p) = vcos(p-2)*vsin(3) + vsin(p-2)*vcos(3)
421 vcos(p+1) = vcos(p-2)*vcos(4) - vsin(p-2)*vsin(4)
422 vsin(p+1) = vcos(p-2)*vsin(4) + vsin(p-2)*vcos(4)
423 case (0)
424 vcos(p) = vcos(p-1)*cphi - vsin(p-1)*sphi
425 vsin(p) = vcos(p-1)*sphi + vsin(p-1)*cphi
426 vcos(p+1) = vcos(p-1)*vcos(3) - vsin(p-1)*vsin(3)
427 vsin(p+1) = vcos(p-1)*vsin(3) + vsin(p-1)*vcos(3)
428 case (1)
429 vcos(p+1) = vcos(p)*cphi - vsin(p)*sphi
430 vsin(p+1) = vcos(p)*sphi + vsin(p)*cphi
431 end select
432end subroutine trgev
433
452subroutine polleg(ctheta, stheta, p, vplm)
453 ! Inputs
454 integer, intent(in) :: p
455 real(dp), intent(in) :: ctheta, stheta
456 ! Outputs
457 real(dp), intent(out) :: vplm((p+1)**2)
458 ! Temporary workspace
459 real(dp) :: work(p+1)
460 ! Call corresponding work routine
461 call polleg_work(ctheta, stheta, p, vplm, work)
462end subroutine polleg
463
482subroutine polleg_work(ctheta, stheta, p, vplm, work)
483 ! Inputs
484 integer, intent(in) :: p
485 real(dp), intent(in) :: ctheta, stheta
486 ! Outputs
487 real(dp), intent(out) :: vplm((p+1)**2)
488 ! Temporary workspace
489 real(dp), intent(out) :: work(p+1)
490 ! Local variables
491 integer :: m, l
492 real(dp) :: tmp1, tmp2, tmp3, tmp4, pl2m, pl1m, plm, pmm
493 ! Easy cases
494 select case (p)
495 case (0)
496 vplm(1) = one
497 return
498 case (1)
499 vplm(1) = one
500 vplm(3) = ctheta
501 vplm(4) = -stheta
502 return
503 case (2)
504 vplm(1) = one
505 vplm(3) = ctheta
506 vplm(4) = -stheta
507 tmp1 = three * stheta
508 tmp2 = ctheta * ctheta
509 vplm(7) = 1.5d0*tmp2 - pt5
510 vplm(8) = -tmp1 * ctheta
511 vplm(9) = tmp1 * stheta
512 return
513 case (3)
514 vplm(1) = one
515 vplm(3) = ctheta
516 vplm(4) = -stheta
517 tmp1 = three * stheta
518 tmp2 = ctheta * ctheta
519 vplm(7) = 1.5d0*tmp2 - pt5
520 vplm(8) = -tmp1 * ctheta
521 vplm(9) = tmp1 * stheta
522 tmp3 = 2.5d0 * tmp2 - pt5
523 vplm(13) = tmp3*ctheta - ctheta
524 vplm(14) = -tmp1 * tmp3
525 tmp4 = -5d0 * stheta
526 vplm(15) = tmp4 * vplm(8)
527 vplm(16) = tmp4 * vplm(9)
528 return
529 end select
530 ! Now p >= 4
531 ! Precompute temporary values
532 do l = 1, p
533 work(l) = dble(2*l-1) * ctheta
534 end do
535 ! Save P_m^m for the inital loop m=0 which is P_0^0 now
536 pmm = one
537 ! Case m < p
538 do m = 0, p-1
539 ! P_{l-2}^m which is P_m^m now
540 pl2m = pmm
541 ! P_m^m
542 vplm((m+1)**2) = pmm
543 ! Temporary to reduce number of operations
544 tmp1 = dble(2*m+1) * pmm
545 ! Update P_m^m for the next iteration
546 pmm = -stheta * tmp1
547 ! P_{l-1}^m which is P_{m+1}^m now
548 pl1m = ctheta * tmp1
549 ! P_{m+1}^m
550 vplm((m+1)*(m+3)) = pl1m
551 ! P_l^m for l>m+1
552 do l = m+2, p
553 plm = work(l)*pl1m - dble(l+m-1)*pl2m
554 plm = plm / dble(l-m)
555 vplm(l*l+l+1+m) = plm
556 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
557 pl2m = pl1m
558 pl1m = plm
559 end do
560 end do
561 ! Case m=p requires only to store P_m^m
562 vplm((p+1)**2) = pmm
563end subroutine polleg_work
564
565!
566! Subroutine to compute the Modified Spherical Bessel function of the first kind
567! @param[in] lmax : Data type
568! @param[in] argument : Argument of Bessel function
569! @param[out] SI : Modified Bessel function of the first kind
570! @param[out] DI : Derivative of modified Bessel function of the first kind
571! @param[out] ierr : error code
572subroutine modified_spherical_bessel_first_kind(lmax, argument, SI, DI, work)
573 use complex_bessel
574 !! Inputs
575 integer, intent(in) :: lmax
576 real(dp), intent(in) :: argument
577 !! Outputs
578 real(dp), dimension(0:lmax), intent(out) :: SI, DI
579 !! Temporary workspace
580 complex(dp), dimension(0:max(1, lmax)), intent(out) :: work
581 !! Local Variables
582 ! Complex_SI : Modified Bessel functions of the first kind
583 ! argument : Argument for Complex_SI
584 ! scaling_factor : sqrt(pi/2x)
585 ! fnu : Starting argument for I_J(x)
586 ! NZ : Number of components set to zero due to underflow
587 ! ierr : Erro indicator for I_J(x)
588 ! l : l=0,...,lmax
589 complex(dp) :: complex_argument
590 real(dp) :: scaling_factor
591 real(dp), parameter :: fnu = 1.5d0
592 integer :: NZ, ierr, l
593 ! If argument is zero then SI is e_1, need to find what is the value of DI
594 if (argument .eq. zero) then
595 si = zero
596 si(0) = one
597 di = zero ! ???
598 return
599 end if
600 ! Compute I_(0.5+J) for J:1,...,lmax
601 ! NOTE: cbesi computes I_(FNU+J-1)
602 !fnu = 1.5
603 scaling_factor = sqrt(pi/(2*argument))
604 ! NOTE: Complex argument is required to call I_J(x)
605 complex_argument = argument
606 ! Compute for l = 0
607 call cbesi(complex_argument, pt5, 1, 1, work(0), nz, ierr)
608 if (ierr .ne. 0) stop 'Error in computing Bessel function of first kind'
609 ! Compute for l = 1,...,lmax
610 if (lmax .gt. 0) then
611 call cbesi(complex_argument, fnu, 1, lmax, work(1:lmax), nz, ierr)
612 if (ierr .ne. 0) stop 'Error in computing Bessel function of first kind'
613 ! l=1 is needed for DI(0)
614 else
615 call cbesi(complex_argument, fnu, 1, 1, work(1), nz, ierr)
616 if (ierr .ne. 0) stop 'Error in computing Bessel function of first kind'
617 end if
618 ! Store the real part of the complex Bessel functions
619 si = real(work(0:lmax))
620 ! Converting Modified Bessel to Spherical Modified Bessel
621 si = scaling_factor * si
622 ! Computation of Derivatives of SI
623 di(0) = scaling_factor * real(work(1))
624 do l = 1, lmax
625 di(l)= si(l-1) - ((l+1.0d0)*si(l))/argument
626 end do
627end subroutine modified_spherical_bessel_first_kind
628
629!
630! Subroutine to compute the Modified Spherical Bessel function of the second kind
631! @param[in] lmax : Size
632! @param[in] argument : Argument of Bessel function
633! @param[out] SK : Modified Bessel function of the second kind
634! @param[out] DK : Derivative of modified Bessel function of the second kind
635subroutine modified_spherical_bessel_second_kind(lmax, argument, SK, DK, work)
636 use complex_bessel
637 !! Inputs
638 integer , intent(in) :: lmax
639 real(dp), intent(in) :: argument
640 !! Outputs
641 real(dp), dimension(0:lmax), intent(out) :: SK, DK
642 !! Temporary workspace
643 complex(dp), dimension(0:max(1, lmax)), intent(out) :: work
644 ! Local Variables
645 ! Complex_SK : Modified Bessel functions of the second kind
646 ! argument : Argument for Complex_SK
647 ! scaling_factor : sqrt(pi/2x)
648 ! fnu : Starting argument for K_J(x)
649 ! NZ : Number of components set to zero due to underflow
650 ! ierr : Error indicator for K_J(x)
651 ! l : l=0,...,lmax
652 complex(dp) :: complex_argument
653 real(dp) :: scaling_factor
654 real(dp), parameter :: fnu=1.5d0
655 integer :: NZ, ierr, l
656 ! Compute K_(0.5+J) for J:1,...,lmax
657 ! NOTE: cbesk computes K_(FNU+J-1)
658 scaling_factor = sqrt(2/(pi*argument))
659 ! NOTE: Complex argument is required to call K_J(x)
660 complex_argument = argument
661 ! Compute for l = 0
662 call cbesk(complex_argument, pt5, 1, 1, work(0), nz, ierr)
663 if (ierr .ne. 0) stop 'Error in computing Bessel function of second kind'
664 ! Compute for l = 1,...,lmax
665 if (lmax .gt. 0) then
666 call cbesk(complex_argument, fnu, 1, lmax, work(1:lmax), nz, ierr)
667 if (ierr .ne. 0) stop 'Error in computing Bessel function of second kind'
668 ! l=1 is needed for DK(0)
669 else
670 call cbesk(complex_argument, fnu, 1, 1, work(1), nz, ierr)
671 if (ierr .ne. 0) stop 'Error in computing Bessel function of second kind'
672 end if
673 ! Store the real part of the complex Bessel functions
674 sk = real(work(0:lmax))
675 ! Converting Modified Bessel to Spherical Modified Bessel
676 sk = scaling_factor * sk
677 ! Computation of Derivatives of SK
678 dk(0) = -scaling_factor * real(work(1))
679 do l = 1, lmax
680 dk(l) = -sk(l-1) - ((l+1.0d0)*sk(l))/argument
681 end do
682end subroutine modified_spherical_bessel_second_kind
683
710subroutine fmm_p2m(c, src_q, dst_r, p, vscales, beta, dst_m)
711 ! Inputs
712 integer, intent(in) :: p
713 real(dp), intent(in) :: c(3), src_q, dst_r, vscales((p+1)**2), beta
714 ! Output
715 real(dp), intent(inout) :: dst_m((p+1)**2)
716 ! Workspace
717 real(dp) :: work(2*(p+1)*(p+2))
718 ! Call corresponding work routine
719 call fmm_p2m_work(c, src_q, dst_r, p, vscales, beta, dst_m, work)
720end subroutine fmm_p2m
721
749subroutine fmm_p2m_work(c, src_q, dst_r, p, vscales, beta, dst_m, work)
750 ! Inputs
751 integer, intent(in) :: p
752 real(dp), intent(in) :: c(3), src_q, dst_r, vscales((p+1)**2), beta
753 ! Output
754 real(dp), intent(inout) :: dst_m((p+1)**2)
755 ! Workspace
756 real(dp), intent(out), target :: work(2*(p+1)*(p+2))
757 ! Local variables
758 real(dp) :: rho, ctheta, stheta, cphi, sphi, t, rcoef
759 integer :: n, ind, vylm1, vylm2, vplm1, vplm2, vcos1, vcos2, vsin1, vsin2
760 ! In case src_q is zero just scale output properly
761 if (src_q .eq. zero) then
762 ! Zero init output if beta is also zero
763 if (beta .eq. zero) then
764 dst_m = zero
765 ! Scale output by beta otherwise
766 else
767 dst_m = beta * dst_m
768 end if
769 ! Exit subroutine
770 return
771 end if
772 ! Now src_q is non-zero
773 ! Mark first and last elements of all work subarrays
774 vylm1 = 1
775 vylm2 = (p+1)**2
776 vplm1 = vylm2 + 1
777 vplm2 = 2 * vylm2
778 vcos1 = vplm2 + 1
779 vcos2 = vcos1 + p
780 vsin1 = vcos2 + 1
781 vsin2 = vsin1 + p
782 ! Get radius and values of spherical harmonics
783 call ylmbas(c, rho, ctheta, stheta, cphi, sphi, p, vscales, &
784 & work(vylm1:vylm2), work(vplm1:vplm2), work(vcos1:vcos2), &
785 & work(vsin1:vsin2))
786 ! Harmonics are available only if rho > 0
787 if (rho .ne. zero) then
788 rcoef = rho / dst_r
789 t = src_q / dst_r
790 ! Ignore input `dst_m` in case of a zero scaling factor beta
791 if (beta .eq. zero) then
792 do n = 0, p
793 ind = n*n + n + 1
794 ! Array vylm is in the beginning of work array
795 dst_m(ind-n:ind+n) = t * work(ind-n:ind+n)
796 t = t * rcoef
797 end do
798 ! Update `dst_m` otherwise
799 else
800 do n = 0, p
801 ind = n*n + n + 1
802 ! Array vylm is in the beginning of work array
803 dst_m(ind-n:ind+n) = beta*dst_m(ind-n:ind+n) + &
804 & t*work(ind-n:ind+n)
805 t = t * rcoef
806 end do
807 end if
808 ! Naive case of rho = 0
809 else
810 ! Ignore input `dst_m` in case of a zero scaling factor beta
811 if (beta .eq. zero) then
812 dst_m(1) = src_q / dst_r / sqrt4pi
813 dst_m(2:) = zero
814 ! Update `m` otherwise
815 else
816 dst_m(1) = beta*dst_m(1) + src_q/dst_r/sqrt4pi
817 dst_m(2:) = beta * dst_m(2:)
818 end if
819 end if
820end subroutine fmm_p2m_work
821
848subroutine fmm_m2p(c, src_r, p, vscales_rel, alpha, src_m, beta, dst_v)
849 ! Inputs
850 integer, intent(in) :: p
851 real(dp), intent(in) :: c(3), src_r, vscales_rel((p+1)*(p+1)), alpha, &
852 & src_m((p+1)*(p+1)), beta
853 ! Output
854 real(dp), intent(inout) :: dst_v
855 ! Temporary workspace
856 real(dp) :: work(p+1)
857 ! Call corresponding work routine
858 call fmm_m2p_work(c, src_r, p, vscales_rel, alpha, src_m, beta, dst_v, &
859 & work)
860end subroutine fmm_m2p
861
891subroutine fmm_m2p_work(c, src_r, p, vscales_rel, alpha, src_m, &
892 & beta, dst_v, work)
893 ! Inputs
894 integer, intent(in) :: p
895 real(dp), intent(in) :: c(3), src_r, vscales_rel((p+1)*(p+1)), alpha, &
896 & src_m((p+1)*(p+1)), beta
897 ! Output
898 real(dp), intent(inout) :: dst_v
899 ! Workspace
900 real(dp), intent(out), target :: work(p+1)
901 ! Local variables
902 real(dp) :: rho, ctheta, stheta, cphi, sphi, rcoef, t, tmp, tmp1, tmp2, &
903 & tmp3, max12, ssq12, pl2m, pl1m, plm, pmm, cmphi, smphi, ylm
904 integer :: l, m, indl
905 ! Scale output
906 if (beta .eq. zero) then
907 dst_v = zero
908 else
909 dst_v = beta * dst_v
910 end if
911 ! In case of zero alpha nothing else is required no matter what is the
912 ! value of the induced potential
913 if (alpha .eq. zero) then
914 return
915 end if
916 ! Get spherical coordinates
917 if (c(1) .eq. zero) then
918 max12 = abs(c(2))
919 ssq12 = one
920 else if (abs(c(2)) .gt. abs(c(1))) then
921 max12 = abs(c(2))
922 ssq12 = one + (c(1)/c(2))**2
923 else
924 max12 = abs(c(1))
925 ssq12 = one + (c(2)/c(1))**2
926 end if
927 ! Then we compute rho
928 if (c(3) .eq. zero) then
929 rho = max12 * sqrt(ssq12)
930 else if (abs(c(3)) .gt. max12) then
931 rho = one + ssq12 *(max12/c(3))**2
932 rho = abs(c(3)) * sqrt(rho)
933 else
934 rho = ssq12 + (c(3)/max12)**2
935 rho = max12 * sqrt(rho)
936 end if
937 ! In case of a singularity (rho=zero) induced potential is infinite and is
938 ! not taken into account.
939 if (rho .eq. zero) then
940 return
941 end if
942 ! Compute the actual induced potential
943 rcoef = src_r / rho
944 ! Length of a vector x(1:2)
945 stheta = max12 * sqrt(ssq12)
946 ! Case x(1:2) != 0
947 if (stheta .ne. zero) then
948 ! Normalize cphi and sphi
949 cphi = c(1) / stheta
950 sphi = c(2) / stheta
951 ! Normalize ctheta and stheta
952 ctheta = c(3) / rho
953 stheta = stheta / rho
954 ! Treat easy cases
955 select case(p)
956 case (0)
957 ! l = 0
958 dst_v = dst_v + alpha*rcoef*vscales_rel(1)*src_m(1)
959 return
960 case (1)
961 ! l = 0
962 tmp = src_m(1) * vscales_rel(1)
963 ! l = 1
964 tmp2 = ctheta * src_m(3) * vscales_rel(3)
965 tmp3 = vscales_rel(4) * stheta
966 tmp2 = tmp2 - tmp3*sphi*src_m(2)
967 tmp2 = tmp2 - tmp3*cphi*src_m(4)
968 dst_v = dst_v + alpha*rcoef*(tmp+rcoef*tmp2)
969 return
970 end select
971 ! Now p>1
972 ! Precompute alpha*rcoef^{l+1}
973 work(1) = alpha * rcoef
974 do l = 1, p
975 work(l+1) = rcoef * work(l)
976 end do
977 ! Case m = 0
978 ! P_{l-2}^m which is P_0^0 now
979 pl2m = one
980 dst_v = dst_v + work(1)*src_m(1)*vscales_rel(1)
981 ! Update P_m^m for the next iteration
982 pmm = -stheta
983 ! P_{l-1}^m which is P_{m+1}^m now
984 pl1m = ctheta
985 ylm = pl1m * vscales_rel(3)
986 dst_v = dst_v + work(2)*src_m(3)*ylm
987 ! P_l^m for l>m+1
988 do l = 2, p
989 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
990 plm = plm / dble(l)
991 ylm = plm * vscales_rel(l*l+l+1)
992 dst_v = dst_v + work(l+1)*src_m(l*l+l+1)*ylm
993 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
994 pl2m = pl1m
995 pl1m = plm
996 end do
997 ! Prepare cos(m*phi) and sin(m*phi) for m=1
998 cmphi = cphi
999 smphi = sphi
1000 ! Case 0<m<p
1001 do m = 1, p-1
1002 ! P_{l-2}^m which is P_m^m now
1003 pl2m = pmm
1004 ylm = pmm * vscales_rel((m+1)**2)
1005 tmp1 = cmphi*src_m((m+1)**2) + smphi*src_m(m*m+1)
1006 dst_v = dst_v + work(m+1)*ylm*tmp1
1007 ! Temporary to reduce number of operations
1008 tmp1 = dble(2*m+1) * pmm
1009 ! Update P_m^m for the next iteration
1010 pmm = -stheta * tmp1
1011 ! P_{l-1}^m which is P_{m+1}^m now
1012 pl1m = ctheta * tmp1
1013 ylm = pl1m * vscales_rel((m+1)*(m+3))
1014 tmp1 = cmphi*src_m((m+1)*(m+3)) + smphi*src_m((m+1)*(m+2)+1-m)
1015 dst_v = dst_v + work(m+2)*ylm*tmp1
1016 ! P_l^m for l>m+1
1017 do l = m+2, p
1018 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
1019 plm = plm / dble(l-m)
1020 ylm = plm * vscales_rel(l*l+l+1+m)
1021 tmp1 = cmphi*src_m(l*l+l+1+m) + smphi*src_m(l*l+l+1-m)
1022 dst_v = dst_v + work(l+1)*ylm*tmp1
1023 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1024 pl2m = pl1m
1025 pl1m = plm
1026 end do
1027 ! Update cos(m*phi) and sin(m*phi) for the next iteration
1028 tmp1 = cmphi
1029 cmphi = cmphi*cphi - smphi*sphi
1030 smphi = tmp1*sphi + smphi*cphi
1031 end do
1032 ! Case m=p requires only to use P_m^m
1033 ylm = pmm * vscales_rel((p+1)**2)
1034 tmp1 = cmphi*src_m((p+1)**2) + smphi*src_m(p*p+1)
1035 dst_v = dst_v + work(p+1)*ylm*tmp1
1036 ! Case of x(1:2) = 0 and x(3) != 0
1037 else
1038 ! In this case Y_l^m = 0 for m != 0, so only case m = 0 is taken into
1039 ! account. Y_l^0 = ctheta^l in this case where ctheta is either +1 or
1040 ! -1. So, we copy sign(ctheta) into rcoef. But before that we
1041 ! initialize alpha/r factor for l=0 case without taking possible sign
1042 ! into account.
1043 t = alpha * rcoef
1044 if (c(3) .lt. zero) then
1045 rcoef = -rcoef
1046 end if
1047 ! Proceed with accumulation of a potential
1048 indl = 1
1049 do l = 0, p
1050 ! Index of Y_l^0
1051 indl = indl + 2*l
1052 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
1053 dst_v = dst_v + t*src_m(indl)*vscales_rel(indl)
1054 ! Update t
1055 t = t * rcoef
1056 end do
1057 end if
1058end subroutine fmm_m2p_work
1059
1086subroutine fmm_m2p_bessel(c, src_r, p, vscales, alpha, src_m, beta, dst_v)
1087 use complex_bessel
1088 ! Inputs
1089 integer, intent(in) :: p
1090 real(dp), intent(in) :: c(3), src_r, vscales((p+1)*(p+1)), alpha, &
1091 & src_m((p+1)*(p+1)), beta
1092 ! Output
1093 real(dp), intent(inout) :: dst_v
1094 ! Temporary workspace
1095 complex(dp) :: work_complex(max(2, p+1))
1096 real(dp) :: work(p+1)
1097 ! Local variables
1098 real(dp) :: src_sk(p+1)
1099 ! Call corresponding work routine
1100 call modified_spherical_bessel_second_kind(p, src_r, src_sk, work, &
1101 & work_complex)
1102 call fmm_m2p_bessel_work(c, p, vscales, src_sk, alpha, src_m, beta, &
1103 & dst_v, work_complex, work)
1104end subroutine fmm_m2p_bessel
1105
1132subroutine fmm_m2p_bessel_work(c, p, vscales, SK_ri, alpha, src_m, beta, &
1133 & dst_v, work_complex, work)
1134 use complex_bessel
1135 !! Inputs
1136 integer, intent(in) :: p
1137 real(dp), intent(in) :: c(3), vscales((p+1)*(p+1)), alpha, &
1138 & src_m((p+1)*(p+1)), beta, SK_ri(p+1)
1139 !! Output
1140 real(dp), intent(inout) :: dst_v
1141 !! Workspace
1142 complex(dp), intent(out) :: work_complex(p+1)
1143 real(dp), intent(out) :: work(p+1)
1144 !! Local variables
1145 real(dp) :: rho, ctheta, stheta, cphi, sphi, tmp, tmp1, tmp2, &
1146 & tmp3, max12, ssq12, pl2m, pl1m, plm, pmm, cmphi, smphi, ylm, t
1147 real(dp) :: scaling_factor
1148 complex(dp) :: complex_argument
1149 integer :: l, m, indl, ierr, NZ
1150 real(dp), parameter :: fnu=1.5d0
1151 ! Scale output
1152 if (beta .eq. zero) then
1153 dst_v = zero
1154 else
1155 dst_v = beta * dst_v
1156 end if
1157 ! In case of zero alpha nothing else is required no matter what is the
1158 ! value of the induced potential
1159 if (alpha .eq. zero) then
1160 return
1161 end if
1162 ! Get spherical coordinates
1163 if (c(1) .eq. zero) then
1164 max12 = abs(c(2))
1165 ssq12 = one
1166 else if (abs(c(2)) .gt. abs(c(1))) then
1167 max12 = abs(c(2))
1168 ssq12 = one + (c(1)/c(2))**2
1169 else
1170 max12 = abs(c(1))
1171 ssq12 = one + (c(2)/c(1))**2
1172 end if
1173 ! Then we compute rho
1174 if (c(3) .eq. zero) then
1175 rho = max12 * sqrt(ssq12)
1176 else if (abs(c(3)) .gt. max12) then
1177 rho = one + ssq12 *(max12/c(3))**2
1178 rho = abs(c(3)) * sqrt(rho)
1179 else
1180 rho = ssq12 + (c(3)/max12)**2
1181 rho = max12 * sqrt(rho)
1182 end if
1183 ! In case of a singularity (rho=zero) induced potential is infinite and is
1184 ! not taken into account.
1185 if (rho .eq. zero) then
1186 return
1187 end if
1188 ! Compute Bessel function
1189 scaling_factor = alpha * sqrt(two / (pi*rho))
1190 ! NOTE: Complex argument is required to call I_J(x)
1191 complex_argument = rho
1192 ! Compute for l = 0
1193 call cbesk(complex_argument, pt5, 1, 1, work_complex(1), nz, ierr)
1194 ! Compute for l = 1,...,p
1195 if (p .gt. 0) then
1196 call cbesk(complex_argument, fnu, 1, p, work_complex(2:p+1), nz, ierr)
1197 end if
1198 work = scaling_factor * real(work_complex) / sk_ri
1199 ! Compute the actual induced potential
1200 ! Length of a vector x(1:2)
1201 stheta = max12 * sqrt(ssq12)
1202 ! Case x(1:2) != 0
1203 if (stheta .ne. zero) then
1204 ! Normalize cphi and sphi
1205 cphi = c(1) / stheta
1206 sphi = c(2) / stheta
1207 ! Normalize ctheta and stheta
1208 ctheta = c(3) / rho
1209 stheta = stheta / rho
1210 ! Treat easy cases
1211 select case(p)
1212 case (0)
1213 ! l = 0
1214 dst_v = dst_v + work(1)*vscales(1)*src_m(1)
1215 return
1216 case (1)
1217 ! l = 0
1218 tmp = work(1) * src_m(1) * vscales(1)
1219 ! l = 1
1220 tmp2 = ctheta * src_m(3) * vscales(3)
1221 tmp3 = stheta * vscales(4)
1222 tmp2 = tmp2 - tmp3*sphi*src_m(2)
1223 tmp2 = tmp2 - tmp3*cphi*src_m(4)
1224 dst_v = dst_v + tmp + work(2)*tmp2
1225 return
1226 end select
1227 ! Now p>1
1228 ! Case m = 0
1229 ! P_{l-2}^m which is P_0^0 now
1230 pl2m = one
1231 dst_v = dst_v + work(1)*src_m(1)*vscales(1)
1232 ! Update P_m^m for the next iteration
1233 pmm = -stheta
1234 ! P_{l-1}^m which is P_{m+1}^m now
1235 pl1m = ctheta
1236 ylm = pl1m * vscales(3)
1237 dst_v = dst_v + work(2)*src_m(3)*ylm
1238 ! P_l^m for l>m+1
1239 do l = 2, p
1240 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
1241 plm = plm / dble(l)
1242 ylm = plm * vscales(l*l+l+1)
1243 dst_v = dst_v + work(l+1)*src_m(l*l+l+1)*ylm
1244 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1245 pl2m = pl1m
1246 pl1m = plm
1247 end do
1248 ! Prepare cos(m*phi) and sin(m*phi) for m=1
1249 cmphi = cphi
1250 smphi = sphi
1251 ! Case 0<m<p
1252 do m = 1, p-1
1253 ! P_{l-2}^m which is P_m^m now
1254 pl2m = pmm
1255 ylm = pmm * vscales((m+1)**2)
1256 tmp1 = cmphi*src_m((m+1)**2) + smphi*src_m(m*m+1)
1257 dst_v = dst_v + work(m+1)*ylm*tmp1
1258 ! Temporary to reduce number of operations
1259 tmp1 = dble(2*m+1) * pmm
1260 ! Update P_m^m for the next iteration
1261 pmm = -stheta * tmp1
1262 ! P_{l-1}^m which is P_{m+1}^m now
1263 pl1m = ctheta * tmp1
1264 ylm = pl1m * vscales((m+1)*(m+3))
1265 tmp1 = cmphi*src_m((m+1)*(m+3)) + smphi*src_m((m+1)*(m+2)+1-m)
1266 dst_v = dst_v + work(m+2)*ylm*tmp1
1267 ! P_l^m for l>m+1
1268 do l = m+2, p
1269 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
1270 plm = plm / dble(l-m)
1271 ylm = plm * vscales(l*l+l+1+m)
1272 tmp1 = cmphi*src_m(l*l+l+1+m) + smphi*src_m(l*l+l+1-m)
1273 dst_v = dst_v + work(l+1)*ylm*tmp1
1274 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1275 pl2m = pl1m
1276 pl1m = plm
1277 end do
1278 ! Update cos(m*phi) and sin(m*phi) for the next iteration
1279 tmp1 = cmphi
1280 cmphi = cmphi*cphi - smphi*sphi
1281 smphi = tmp1*sphi + smphi*cphi
1282 end do
1283 ! Case m=p requires only to use P_m^m
1284 ylm = pmm * vscales((p+1)**2)
1285 tmp1 = cmphi*src_m((p+1)**2) + smphi*src_m(p*p+1)
1286 dst_v = dst_v + work(p+1)*ylm*tmp1
1287 ! Case of x(1:2) = 0 and x(3) != 0
1288 else
1289 ! In this case Y_l^m = 0 for m != 0, so only case m = 0 is taken into
1290 ! account. Y_l^0 = ctheta^l in this case where ctheta is either +1 or
1291 ! -1. So, we copy sign(ctheta) into rcoef. But before that we
1292 ! initialize alpha/r factor for l=0 case without taking possible sign
1293 ! into account.
1294 t = one
1295 if (c(3) .lt. zero) then
1296 ! Proceed with accumulation of a potential
1297 indl = 1
1298 do l = 0, p
1299 ! Index of Y_l^0
1300 indl = indl + 2*l
1301 ! Y_l^0 contribution
1302 dst_v = dst_v + t*work(l+1)*src_m(indl)*vscales(indl)
1303 t = -t
1304 end do
1305 else
1306 ! Proceed with accumulation of a potential
1307 indl = 1
1308 do l = 0, p
1309 ! Index of Y_l^0
1310 indl = indl + 2*l
1311 ! Y_l^0 contribution
1312 dst_v = dst_v + work(l+1)*src_m(indl)*vscales(indl)
1313 end do
1314 end if
1315 end if
1316end subroutine fmm_m2p_bessel_work
1317
1318subroutine fmm_m2p_bessel_grad(c, src_r, p, vscales, alpha, src_m, beta, dst_g)
1319 use complex_bessel
1320 ! Inputs
1321 integer, intent(in) :: p
1322 real(dp), intent(in) :: c(3), src_r, vscales((p+2)*(p+2)), alpha, &
1323 & src_m((p+1)*(p+1)), beta
1324 ! Output
1325 real(dp), intent(inout) :: dst_g(3)
1326 ! Temporary workspace
1327 complex(dp) :: work_complex(p+2)
1328 real(dp) :: work(p+2), src_sk(p+2), src_m_grad((p+2)**2, 3)
1329 ! Call corresponding work routine
1330 call modified_spherical_bessel_second_kind(p+1, src_r, src_sk, work, &
1331 & work_complex)
1332 call fmm_m2m_bessel_grad(p, src_sk, vscales, src_m, src_m_grad)
1333 call fmm_m2p_bessel_work(c, p+1, vscales, src_sk, -alpha, src_m_grad(:, 1), &
1334 & beta, dst_g(1), work_complex, work)
1335 call fmm_m2p_bessel_work(c, p+1, vscales, src_sk, -alpha, src_m_grad(:, 2), &
1336 & beta, dst_g(2), work_complex, work)
1337 call fmm_m2p_bessel_work(c, p+1, vscales, src_sk, -alpha, src_m_grad(:, 3), &
1338 & beta, dst_g(3), work_complex, work)
1339end subroutine fmm_m2p_bessel_grad
1340
1366subroutine fmm_m2p_adj(c, src_q, dst_r, p, vscales_rel, beta, dst_m)
1367 ! Inputs
1368 integer, intent(in) :: p
1369 real(dp), intent(in) :: c(3), src_q, dst_r, vscales_rel((p+1)*(p+1)), beta
1370 ! Output
1371 real(dp), intent(inout) :: dst_m((p+1)**2)
1372 ! Workspace
1373 real(dp) :: work(p+1)
1374 ! Call corresponding work routine
1375 call fmm_m2p_adj_work(c, src_q, dst_r, p, vscales_rel, beta, dst_m, work)
1376end subroutine fmm_m2p_adj
1377
1404subroutine fmm_m2p_adj_work(c, src_q, dst_r, p, vscales_rel, beta, dst_m, work)
1405 ! Inputs
1406 integer, intent(in) :: p
1407 real(dp), intent(in) :: c(3), src_q, dst_r, vscales_rel((p+1)*(p+1)), beta
1408 ! Output
1409 real(dp), intent(inout) :: dst_m((p+1)**2)
1410 ! Workspace
1411 real(dp), intent(out), target :: work(p+1)
1412 ! Local variables
1413 real(dp) :: rho, ctheta, stheta, cphi, sphi, rcoef, t, tmp, tmp1, tmp2, &
1414 & max12, ssq12, pl2m, pl1m, plm, pmm, cmphi, smphi, ylm
1415 integer :: l, m, indl
1416 ! In case src_q is zero just scale output properly
1417 if (src_q .eq. zero) then
1418 ! Zero init output if beta is also zero
1419 if (beta .eq. zero) then
1420 dst_m = zero
1421 ! Scale output by beta otherwise
1422 else
1423 dst_m = beta * dst_m
1424 end if
1425 ! Exit subroutine
1426 return
1427 end if
1428 ! Now src_q is non-zero
1429 ! Get spherical coordinates
1430 if (c(1) .eq. zero) then
1431 max12 = abs(c(2))
1432 ssq12 = one
1433 else if (abs(c(2)) .gt. abs(c(1))) then
1434 max12 = abs(c(2))
1435 ssq12 = one + (c(1)/c(2))**2
1436 else
1437 max12 = abs(c(1))
1438 ssq12 = one + (c(2)/c(1))**2
1439 end if
1440 ! Then we compute rho
1441 if (c(3) .eq. zero) then
1442 rho = max12 * sqrt(ssq12)
1443 else if (abs(c(3)) .gt. max12) then
1444 rho = one + ssq12 *(max12/c(3))**2
1445 rho = abs(c(3)) * sqrt(rho)
1446 else
1447 rho = ssq12 + (c(3)/max12)**2
1448 rho = max12 * sqrt(rho)
1449 end if
1450 ! In case of a singularity (rho=zero) induced potential is infinite and is
1451 ! not taken into account.
1452 if (rho .eq. zero) then
1453 return
1454 end if
1455 ! Compute actual induced potentials
1456 rcoef = dst_r / rho
1457 ! Length of a vector x(1:2)
1458 stheta = max12 * sqrt(ssq12)
1459 ! Case x(1:2) != 0
1460 if (stheta .ne. zero) then
1461 ! Normalize cphi and sphi
1462 cphi = c(1) / stheta
1463 sphi = c(2) / stheta
1464 ! Normalize ctheta and stheta
1465 ctheta = c(3) / rho
1466 stheta = stheta / rho
1467 ! Treat easy cases
1468 t = src_q * rcoef
1469 select case(p)
1470 case (0)
1471 if (beta .eq. zero) then
1472 dst_m(1) = t * vscales_rel(1)
1473 else
1474 dst_m(1) = beta*dst_m(1) + t*vscales_rel(1)
1475 end if
1476 return
1477 case (1)
1478 if (beta .eq. zero) then
1479 ! l = 0
1480 dst_m(1) = t * vscales_rel(1)
1481 ! l = 1
1482 t = t * rcoef
1483 tmp = -vscales_rel(4) * stheta
1484 tmp2 = t * tmp
1485 dst_m(2) = tmp2 * sphi
1486 dst_m(3) = t * vscales_rel(3) * ctheta
1487 dst_m(4) = tmp2 * cphi
1488 else
1489 ! l = 0
1490 dst_m(1) = beta*dst_m(1) + t*vscales_rel(1)
1491 ! l = 1
1492 t = t * rcoef
1493 tmp = -vscales_rel(4) * stheta
1494 tmp2 = t * tmp
1495 dst_m(2) = beta*dst_m(2) + tmp2*sphi
1496 dst_m(3) = beta*dst_m(3) + t*vscales_rel(3)*ctheta
1497 dst_m(4) = beta*dst_m(4) + tmp2*cphi
1498 end if
1499 return
1500 end select
1501 ! Now p>1
1502 ! Precompute src_q*rcoef^{l+1}
1503 work(1) = src_q * rcoef
1504 do l = 1, p
1505 work(l+1) = rcoef * work(l)
1506 end do
1507 ! Overwrite output
1508 if (beta .eq. zero) then
1509 ! Case m = 0
1510 ! P_{l-2}^m which is P_0^0 now
1511 pl2m = one
1512 dst_m(1) = work(1) * vscales_rel(1)
1513 ! Update P_m^m for the next iteration
1514 pmm = -stheta
1515 ! P_{l-1}^m which is P_{m+1}^m now
1516 pl1m = ctheta
1517 ylm = pl1m * vscales_rel(3)
1518 dst_m(3) = work(2) * ylm
1519 ! P_l^m for l>m+1
1520 do l = 2, p
1521 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
1522 plm = plm / dble(l)
1523 ylm = plm * vscales_rel(l*l+l+1)
1524 dst_m(l*l+l+1) = work(l+1) * ylm
1525 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1526 pl2m = pl1m
1527 pl1m = plm
1528 end do
1529 ! Prepare cos(m*phi) and sin(m*phi) for m=1
1530 cmphi = cphi
1531 smphi = sphi
1532 ! Case 0<m<p
1533 do m = 1, p-1
1534 ! P_{l-2}^m which is P_m^m now
1535 pl2m = pmm
1536 ylm = pmm * vscales_rel((m+1)**2)
1537 ylm = work(m+1) * ylm
1538 dst_m((m+1)**2) = cmphi * ylm
1539 dst_m(m*m+1) = smphi * ylm
1540 ! Temporary to reduce number of operations
1541 tmp1 = dble(2*m+1) * pmm
1542 ! Update P_m^m for the next iteration
1543 pmm = -stheta * tmp1
1544 ! P_{l-1}^m which is P_{m+1}^m now
1545 pl1m = ctheta * tmp1
1546 ylm = pl1m * vscales_rel((m+1)*(m+3))
1547 ylm = work(m+2) * ylm
1548 dst_m((m+1)*(m+3)) = cmphi * ylm
1549 dst_m((m+1)*(m+2)+1-m) = smphi * ylm
1550 ! P_l^m for l>m+1
1551 do l = m+2, p
1552 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
1553 plm = plm / dble(l-m)
1554 ylm = plm * vscales_rel(l*l+l+1+m)
1555 ylm = work(l+1) * ylm
1556 dst_m(l*l+l+1+m) = cmphi * ylm
1557 dst_m(l*l+l+1-m) = smphi * ylm
1558 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1559 pl2m = pl1m
1560 pl1m = plm
1561 end do
1562 ! Update cos(m*phi) and sin(m*phi) for the next iteration
1563 tmp1 = cmphi
1564 cmphi = cmphi*cphi - smphi*sphi
1565 smphi = tmp1*sphi + smphi*cphi
1566 end do
1567 ! Case m=p requires only to use P_m^m
1568 ylm = pmm * vscales_rel((p+1)**2)
1569 ylm = work(p+1) * ylm
1570 dst_m((p+1)**2) = cmphi * ylm
1571 dst_m(p*p+1) = smphi * ylm
1572 ! Update output
1573 else
1574 ! Case m = 0
1575 ! P_{l-2}^m which is P_0^0 now
1576 pl2m = one
1577 dst_m(1) = beta*dst_m(1) + work(1)*vscales_rel(1)
1578 ! Update P_m^m for the next iteration
1579 pmm = -stheta
1580 ! P_{l-1}^m which is P_{m+1}^m now
1581 pl1m = ctheta
1582 ylm = pl1m * vscales_rel(3)
1583 dst_m(3) = beta*dst_m(3) + work(2)*ylm
1584 ! P_l^m for l>m+1
1585 do l = 2, p
1586 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
1587 plm = plm / dble(l)
1588 ylm = plm * vscales_rel(l*l+l+1)
1589 dst_m(l*l+l+1) = beta*dst_m(l*l+l+1) + work(l+1)*ylm
1590 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1591 pl2m = pl1m
1592 pl1m = plm
1593 end do
1594 ! Prepare cos(m*phi) and sin(m*phi) for m=1
1595 cmphi = cphi
1596 smphi = sphi
1597 ! Case 0<m<p
1598 do m = 1, p-1
1599 ! P_{l-2}^m which is P_m^m now
1600 pl2m = pmm
1601 ylm = pmm * vscales_rel((m+1)**2)
1602 ylm = work(m+1) * ylm
1603 dst_m((m+1)**2) = beta*dst_m((m+1)**2) + cmphi*ylm
1604 dst_m(m*m+1) = beta*dst_m(m*m+1) + smphi*ylm
1605 ! Temporary to reduce number of operations
1606 tmp1 = dble(2*m+1) * pmm
1607 ! Update P_m^m for the next iteration
1608 pmm = -stheta * tmp1
1609 ! P_{l-1}^m which is P_{m+1}^m now
1610 pl1m = ctheta * tmp1
1611 ylm = pl1m * vscales_rel((m+1)*(m+3))
1612 ylm = work(m+2) * ylm
1613 dst_m((m+1)*(m+3)) = beta*dst_m((m+1)*(m+3)) + cmphi*ylm
1614 dst_m((m+1)*(m+2)+1-m) = beta*dst_m((m+1)*(m+2)+1-m) + &
1615 & smphi*ylm
1616 ! P_l^m for l>m+1
1617 do l = m+2, p
1618 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
1619 plm = plm / dble(l-m)
1620 ylm = plm * vscales_rel(l*l+l+1+m)
1621 ylm = work(l+1) * ylm
1622 dst_m(l*l+l+1+m) = beta*dst_m(l*l+l+1+m) + cmphi*ylm
1623 dst_m(l*l+l+1-m) = beta*dst_m(l*l+l+1-m) + smphi*ylm
1624 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1625 pl2m = pl1m
1626 pl1m = plm
1627 end do
1628 ! Update cos(m*phi) and sin(m*phi) for the next iteration
1629 tmp1 = cmphi
1630 cmphi = cmphi*cphi - smphi*sphi
1631 smphi = tmp1*sphi + smphi*cphi
1632 end do
1633 ! Case m=p requires only to use P_m^m
1634 ylm = pmm * vscales_rel((p+1)**2)
1635 ylm = work(p+1) * ylm
1636 dst_m((p+1)**2) = beta*dst_m((p+1)**2) + cmphi*ylm
1637 dst_m(p*p+1) = beta*dst_m(p*p+1) + smphi*ylm
1638 end if
1639 ! Case of x(1:2) = 0 and x(3) != 0
1640 else
1641 ! In this case Y_l^m = 0 for m != 0, so only case m = 0 is taken into
1642 ! account. Y_l^0 = ctheta^l in this case where ctheta is either +1 or
1643 ! -1. So, we copy sign(ctheta) into rcoef. But before that we
1644 ! initialize alpha/r factor for l=0 case without taking possible sign
1645 ! into account.
1646 t = src_q * rcoef
1647 if (c(3) .lt. zero) then
1648 rcoef = -rcoef
1649 end if
1650 ! Proceed with accumulation of a potential
1651 indl = 1
1652 if (beta .eq. zero) then
1653 do l = 0, p
1654 ! Index of Y_l^0
1655 indl = indl + 2*l
1656 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
1657 dst_m(indl) = t * vscales_rel(indl)
1658 dst_m(indl-l:indl-1) = zero
1659 dst_m(indl+1:indl+l) = zero
1660 ! Update t
1661 t = t * rcoef
1662 end do
1663 else
1664 do l = 0, p
1665 ! Index of Y_l^0
1666 indl = indl + 2*l
1667 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
1668 dst_m(indl) = beta*dst_m(indl) + t*vscales_rel(indl)
1669 dst_m(indl-l:indl-1) = beta * dst_m(indl-l:indl-1)
1670 dst_m(indl+1:indl+l) = beta * dst_m(indl+1:indl+l)
1671 ! Update t
1672 t = t * rcoef
1673 end do
1674 end if
1675 end if
1676end subroutine fmm_m2p_adj_work
1677
1703subroutine fmm_m2p_bessel_adj(c, src_q, dst_r, kappa, p, vscales, beta, dst_m)
1704 ! Inputs
1705 integer, intent(in) :: p
1706 real(dp), intent(in) :: c(3), src_q, dst_r, vscales((p+1)*(p+1)), beta, &
1707 & kappa
1708 ! Output
1709 real(dp), intent(inout) :: dst_m((p+1)**2)
1710 ! Workspace
1711 complex(dp) :: work_complex(max(2, p+1))
1712 real(dp) :: work(p+1)
1713 ! Local variables
1714 real(dp) :: dst_sk(p+1), ck(3)
1715 ! Call corresponding work routine
1716 call modified_spherical_bessel_second_kind(p, kappa*dst_r, dst_sk, work, &
1717 & work_complex)
1718 ck = kappa*c
1719 call fmm_m2p_bessel_adj_work(ck, src_q, dst_sk, p, vscales, beta, &
1720 & dst_m, work_complex, work)
1721end subroutine fmm_m2p_bessel_adj
1722
1749subroutine fmm_m2p_bessel_adj_work(c, src_q, dst_sk, p, vscales, beta, dst_m, &
1750 & work_complex, work)
1751 use complex_bessel
1752 ! Inputs
1753 integer, intent(in) :: p
1754 real(dp), intent(in) :: c(3), src_q, dst_sk(p+1), vscales((p+1)*(p+1)), beta
1755 ! Output
1756 real(dp), intent(inout) :: dst_m((p+1)**2)
1757 ! Workspace
1758 complex(dp), intent(out) :: work_complex(p+1)
1759 real(dp), intent(out) :: work(p+1)
1760 ! Local variables
1761 real(dp) :: rho, ctheta, stheta, cphi, sphi, t, tmp, tmp1, tmp2, &
1762 & max12, ssq12, pl2m, pl1m, plm, pmm, cmphi, smphi, ylm, &
1763 & scaling_factor
1764 integer :: l, m, indl, NZ, ierr
1765 complex(dp) :: complex_argument
1766 ! In case src_q is zero just scale output properly
1767 if (src_q .eq. zero) then
1768 ! Zero init output if beta is also zero
1769 if (beta .eq. zero) then
1770 dst_m = zero
1771 ! Scale output by beta otherwise
1772 else
1773 dst_m = beta * dst_m
1774 end if
1775 ! Exit subroutine
1776 return
1777 end if
1778 ! Now src_q is non-zero
1779 ! Get spherical coordinates
1780 if (c(1) .eq. zero) then
1781 max12 = abs(c(2))
1782 ssq12 = one
1783 else if (abs(c(2)) .gt. abs(c(1))) then
1784 max12 = abs(c(2))
1785 ssq12 = one + (c(1)/c(2))**2
1786 else
1787 max12 = abs(c(1))
1788 ssq12 = one + (c(2)/c(1))**2
1789 end if
1790 ! Then we compute rho
1791 if (c(3) .eq. zero) then
1792 rho = max12 * sqrt(ssq12)
1793 else if (abs(c(3)) .gt. max12) then
1794 rho = one + ssq12 *(max12/c(3))**2
1795 rho = abs(c(3)) * sqrt(rho)
1796 else
1797 rho = ssq12 + (c(3)/max12)**2
1798 rho = max12 * sqrt(rho)
1799 end if
1800 ! In case of a singularity (rho=zero) induced potential is infinite and is
1801 ! not taken into account.
1802 if (rho .eq. zero) then
1803 return
1804 end if
1805 ! Compute Bessel function
1806 scaling_factor = src_q * sqrt(two / (pi*rho))
1807 ! NOTE: Complex argument is required to call I_J(x)
1808 complex_argument = rho
1809 ! Compute for l = 0
1810 call cbesk(complex_argument, pt5, 1, 1, work_complex(1), nz, ierr)
1811 ! Compute for l = 1,...,p
1812 if (p .gt. 0) then
1813 call cbesk(complex_argument, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
1814 end if
1815 work = scaling_factor * real(work_complex) / dst_sk
1816 ! Length of a vector x(1:2)
1817 stheta = max12 * sqrt(ssq12)
1818 ! Case x(1:2) != 0
1819 if (stheta .ne. zero) then
1820 ! Normalize cphi and sphi
1821 cphi = c(1) / stheta
1822 sphi = c(2) / stheta
1823 ! Normalize ctheta and stheta
1824 ctheta = c(3) / rho
1825 stheta = stheta / rho
1826 ! Treat easy cases
1827 select case(p)
1828 case (0)
1829 if (beta .eq. zero) then
1830 dst_m(1) = work(1) * vscales(1)
1831 else
1832 dst_m(1) = beta*dst_m(1) + work(1)*vscales(1)
1833 end if
1834 return
1835 case (1)
1836 if (beta .eq. zero) then
1837 ! l = 0
1838 dst_m(1) = work(1) * vscales(1)
1839 ! l = 1
1840 tmp = -vscales(4) * stheta
1841 tmp2 = work(2) * tmp
1842 dst_m(2) = tmp2 * sphi
1843 dst_m(3) = work(2) * vscales(3) * ctheta
1844 dst_m(4) = tmp2 * cphi
1845 else
1846 ! l = 0
1847 dst_m(1) = beta*dst_m(1) + work(1)*vscales(1)
1848 ! l = 1
1849 tmp = -vscales(4) * stheta
1850 tmp2 = work(2) * tmp
1851 dst_m(2) = beta*dst_m(2) + tmp2*sphi
1852 dst_m(3) = beta*dst_m(3) + work(2)*vscales(3)*ctheta
1853 dst_m(4) = beta*dst_m(4) + tmp2*cphi
1854 end if
1855 return
1856 end select
1857 ! Now p>1
1858 ! Overwrite output
1859 if (beta .eq. zero) then
1860 ! Case m = 0
1861 ! P_{l-2}^m which is P_0^0 now
1862 pl2m = one
1863 dst_m(1) = work(1) * vscales(1)
1864 ! Update P_m^m for the next iteration
1865 pmm = -stheta
1866 ! P_{l-1}^m which is P_{m+1}^m now
1867 pl1m = ctheta
1868 ylm = pl1m * vscales(3)
1869 dst_m(3) = work(2) * ylm
1870 ! P_l^m for l>m+1
1871 do l = 2, p
1872 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
1873 plm = plm / dble(l)
1874 ylm = plm * vscales(l*l+l+1)
1875 dst_m(l*l+l+1) = work(l+1) * ylm
1876 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1877 pl2m = pl1m
1878 pl1m = plm
1879 end do
1880 ! Prepare cos(m*phi) and sin(m*phi) for m=1
1881 cmphi = cphi
1882 smphi = sphi
1883 ! Case 0<m<p
1884 do m = 1, p-1
1885 ! P_{l-2}^m which is P_m^m now
1886 pl2m = pmm
1887 ylm = pmm * vscales((m+1)**2)
1888 ylm = work(m+1) * ylm
1889 dst_m((m+1)**2) = cmphi * ylm
1890 dst_m(m*m+1) = smphi * ylm
1891 ! Temporary to reduce number of operations
1892 tmp1 = dble(2*m+1) * pmm
1893 ! Update P_m^m for the next iteration
1894 pmm = -stheta * tmp1
1895 ! P_{l-1}^m which is P_{m+1}^m now
1896 pl1m = ctheta * tmp1
1897 ylm = pl1m * vscales((m+1)*(m+3))
1898 ylm = work(m+2) * ylm
1899 dst_m((m+1)*(m+3)) = cmphi * ylm
1900 dst_m((m+1)*(m+2)+1-m) = smphi * ylm
1901 ! P_l^m for l>m+1
1902 do l = m+2, p
1903 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
1904 plm = plm / dble(l-m)
1905 ylm = plm * vscales(l*l+l+1+m)
1906 ylm = work(l+1) * ylm
1907 dst_m(l*l+l+1+m) = cmphi * ylm
1908 dst_m(l*l+l+1-m) = smphi * ylm
1909 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1910 pl2m = pl1m
1911 pl1m = plm
1912 end do
1913 ! Update cos(m*phi) and sin(m*phi) for the next iteration
1914 tmp1 = cmphi
1915 cmphi = cmphi*cphi - smphi*sphi
1916 smphi = tmp1*sphi + smphi*cphi
1917 end do
1918 ! Case m=p requires only to use P_m^m
1919 ylm = pmm * vscales((p+1)**2)
1920 ylm = work(p+1) * ylm
1921 dst_m((p+1)**2) = cmphi * ylm
1922 dst_m(p*p+1) = smphi * ylm
1923 ! Update output
1924 else
1925 ! Case m = 0
1926 ! P_{l-2}^m which is P_0^0 now
1927 pl2m = one
1928 dst_m(1) = beta*dst_m(1) + work(1)*vscales(1)
1929 ! Update P_m^m for the next iteration
1930 pmm = -stheta
1931 ! P_{l-1}^m which is P_{m+1}^m now
1932 pl1m = ctheta
1933 ylm = pl1m * vscales(3)
1934 dst_m(3) = beta*dst_m(3) + work(2)*ylm
1935 ! P_l^m for l>m+1
1936 do l = 2, p
1937 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
1938 plm = plm / dble(l)
1939 ylm = plm * vscales(l*l+l+1)
1940 dst_m(l*l+l+1) = beta*dst_m(l*l+l+1) + work(l+1)*ylm
1941 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1942 pl2m = pl1m
1943 pl1m = plm
1944 end do
1945 ! Prepare cos(m*phi) and sin(m*phi) for m=1
1946 cmphi = cphi
1947 smphi = sphi
1948 ! Case 0<m<p
1949 do m = 1, p-1
1950 ! P_{l-2}^m which is P_m^m now
1951 pl2m = pmm
1952 ylm = pmm * vscales((m+1)**2)
1953 ylm = work(m+1) * ylm
1954 dst_m((m+1)**2) = beta*dst_m((m+1)**2) + cmphi*ylm
1955 dst_m(m*m+1) = beta*dst_m(m*m+1) + smphi*ylm
1956 ! Temporary to reduce number of operations
1957 tmp1 = dble(2*m+1) * pmm
1958 ! Update P_m^m for the next iteration
1959 pmm = -stheta * tmp1
1960 ! P_{l-1}^m which is P_{m+1}^m now
1961 pl1m = ctheta * tmp1
1962 ylm = pl1m * vscales((m+1)*(m+3))
1963 ylm = work(m+2) * ylm
1964 dst_m((m+1)*(m+3)) = beta*dst_m((m+1)*(m+3)) + cmphi*ylm
1965 dst_m((m+1)*(m+2)+1-m) = beta*dst_m((m+1)*(m+2)+1-m) + &
1966 & smphi*ylm
1967 ! P_l^m for l>m+1
1968 do l = m+2, p
1969 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
1970 plm = plm / dble(l-m)
1971 ylm = plm * vscales(l*l+l+1+m)
1972 ylm = work(l+1) * ylm
1973 dst_m(l*l+l+1+m) = beta*dst_m(l*l+l+1+m) + cmphi*ylm
1974 dst_m(l*l+l+1-m) = beta*dst_m(l*l+l+1-m) + smphi*ylm
1975 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
1976 pl2m = pl1m
1977 pl1m = plm
1978 end do
1979 ! Update cos(m*phi) and sin(m*phi) for the next iteration
1980 tmp1 = cmphi
1981 cmphi = cmphi*cphi - smphi*sphi
1982 smphi = tmp1*sphi + smphi*cphi
1983 end do
1984 ! Case m=p requires only to use P_m^m
1985 ylm = pmm * vscales((p+1)**2)
1986 ylm = work(p+1) * ylm
1987 dst_m((p+1)**2) = beta*dst_m((p+1)**2) + cmphi*ylm
1988 dst_m(p*p+1) = beta*dst_m(p*p+1) + smphi*ylm
1989 end if
1990 ! Case of x(1:2) = 0 and x(3) != 0
1991 else
1992 ! In this case Y_l^m = 0 for m != 0, so only case m = 0 is taken into
1993 ! account. Y_l^0 = ctheta^l in this case where ctheta is either +1 or
1994 ! -1. So, we copy sign(ctheta) into rcoef. But before that we
1995 ! initialize alpha/r factor for l=0 case without taking possible sign
1996 ! into account.
1997 t = one
1998 if (c(3) .lt. zero) then
1999 ! Proceed with accumulation of a potential
2000 indl = 1
2001 if (beta .eq. zero) then
2002 do l = 0, p
2003 ! Index of Y_l^0
2004 indl = indl + 2*l
2005 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
2006 dst_m(indl) = t * work(l+1) * vscales(indl)
2007 dst_m(indl-l:indl-1) = zero
2008 dst_m(indl+1:indl+l) = zero
2009 ! Update t
2010 t = -t
2011 end do
2012 else
2013 do l = 0, p
2014 ! Index of Y_l^0
2015 indl = indl + 2*l
2016 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
2017 dst_m(indl) = beta*dst_m(indl) + t*work(l+1)*vscales(indl)
2018 dst_m(indl-l:indl-1) = beta * dst_m(indl-l:indl-1)
2019 dst_m(indl+1:indl+l) = beta * dst_m(indl+1:indl+l)
2020 ! Update t
2021 t = -t
2022 end do
2023 end if
2024 else
2025 ! Proceed with accumulation of a potential
2026 indl = 1
2027 if (beta .eq. zero) then
2028 do l = 0, p
2029 ! Index of Y_l^0
2030 indl = indl + 2*l
2031 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
2032 dst_m(indl) = work(l+1) * vscales(indl)
2033 dst_m(indl-l:indl-1) = zero
2034 dst_m(indl+1:indl+l) = zero
2035 end do
2036 else
2037 do l = 0, p
2038 ! Index of Y_l^0
2039 indl = indl + 2*l
2040 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
2041 dst_m(indl) = beta*dst_m(indl) + work(l+1)*vscales(indl)
2042 dst_m(indl-l:indl-1) = beta * dst_m(indl-l:indl-1)
2043 dst_m(indl+1:indl+l) = beta * dst_m(indl+1:indl+l)
2044 end do
2045 end if
2046 end if
2047 end if
2048end subroutine fmm_m2p_bessel_adj_work
2049
2073subroutine fmm_m2p_mat(c, r, p, vscales, mat)
2074 ! Inputs
2075 integer, intent(in) :: p
2076 real(dp), intent(in) :: c(3), r, vscales((p+1)**2)
2077 ! Output
2078 real(dp), intent(out) :: mat((p+1)**2)
2079 ! Local variables
2080 real(dp) :: rho, ctheta, stheta, cphi, sphi, vcos(p+1), vsin(p+1)
2081 real(dp) :: vylm((p+1)**2), vplm((p+1)**2), rcoef, t
2082 integer :: n, ind
2083 ! Get radius and values of spherical harmonics
2084 call ylmbas(c, rho, ctheta, stheta, cphi, sphi, p, vscales, vylm, vplm, &
2085 & vcos, vsin)
2086 ! In case of a singularity (rho=zero) induced potential is infinite and is
2087 ! not taken into account.
2088 if (rho .eq. zero) then
2089 return
2090 end if
2091 ! Compute actual induced potentials
2092 rcoef = r / rho
2093 t = one
2094 do n = 0, p
2095 t = t * rcoef
2096 ind = n*n + n + 1
2097 mat(ind-n:ind+n) = t / vscales(ind)**2 * vylm(ind-n:ind+n)
2098 end do
2099end subroutine fmm_m2p_mat
2100
2126subroutine fmm_l2p(c, src_r, p, vscales, alpha, src_l, beta, dst_v)
2127 ! Inputs
2128 integer, intent(in) :: p
2129 real(dp), intent(in) :: c(3), src_r, vscales((p+1)*(p+1)), alpha, &
2130 & src_l((p+1)*(p+1)), beta
2131 ! Output
2132 real(dp), intent(inout) :: dst_v
2133 ! Workspace
2134 real(dp) :: work(p+1)
2135 ! Call corresponding work routine
2136 call fmm_l2p_work(c, src_r, p, vscales, alpha, src_l, beta, dst_v, work)
2137end subroutine fmm_l2p
2138
2165subroutine fmm_l2p_work(c, src_r, p, vscales_rel, alpha, src_l, beta, dst_v, &
2166 & work)
2167 ! Inputs
2168 integer, intent(in) :: p
2169 real(dp), intent(in) :: c(3), src_r, vscales_rel((p+1)*(p+1)), alpha, &
2170 & src_l((p+1)*(p+1)), beta
2171 ! Output
2172 real(dp), intent(inout) :: dst_v
2173 ! Workspace
2174 real(dp), intent(out), target :: work(p+1)
2175 ! Local variables
2176 real(dp) :: rho, ctheta, stheta, cphi, sphi, rcoef, t, tmp, tmp1, tmp2, &
2177 & tmp3, max12, ssq12, pl2m, pl1m, plm, pmm, cmphi, smphi, ylm
2178 integer :: l, m, indl
2179 ! Scale output
2180 if (beta .eq. zero) then
2181 dst_v = zero
2182 else
2183 dst_v = beta * dst_v
2184 end if
2185 ! In case of zero alpha nothing else is required no matter what is the
2186 ! value of the induced potential
2187 if (alpha .eq. zero) then
2188 return
2189 end if
2190 ! Get spherical coordinates
2191 if (c(1) .eq. zero) then
2192 max12 = abs(c(2))
2193 ssq12 = one
2194 else if (abs(c(2)) .gt. abs(c(1))) then
2195 max12 = abs(c(2))
2196 ssq12 = one + (c(1)/c(2))**2
2197 else
2198 max12 = abs(c(1))
2199 ssq12 = one + (c(2)/c(1))**2
2200 end if
2201 ! Then we compute rho
2202 if (c(3) .eq. zero) then
2203 rho = max12 * sqrt(ssq12)
2204 else if (abs(c(3)) .gt. max12) then
2205 rho = one + ssq12 *(max12/c(3))**2
2206 rho = abs(c(3)) * sqrt(rho)
2207 else
2208 rho = ssq12 + (c(3)/max12)**2
2209 rho = max12 * sqrt(rho)
2210 end if
2211 ! In case of a singularity (rho=zero) induced potential depends only on the
2212 ! leading 1-st spherical harmonic
2213 if (rho .eq. zero) then
2214 dst_v = dst_v + alpha*src_l(1)*vscales_rel(1)
2215 return
2216 end if
2217 ! Compute the actual induced potential
2218 rcoef = rho / src_r
2219 ! Length of a vector x(1:2)
2220 stheta = max12 * sqrt(ssq12)
2221 ! Case x(1:2) != 0
2222 if (stheta .ne. zero) then
2223 ! Normalize cphi and sphi
2224 cphi = c(1) / stheta
2225 sphi = c(2) / stheta
2226 ! Normalize ctheta and stheta
2227 ctheta = c(3) / rho
2228 stheta = stheta / rho
2229 ! Treat easy cases
2230 select case(p)
2231 case (0)
2232 ! l = 0
2233 dst_v = dst_v + alpha*vscales_rel(1)*src_l(1)
2234 return
2235 case (1)
2236 ! l = 0
2237 tmp = src_l(1) * vscales_rel(1)
2238 ! l = 1
2239 tmp2 = ctheta * src_l(3) * vscales_rel(3)
2240 tmp3 = vscales_rel(4) * stheta
2241 tmp2 = tmp2 - tmp3*sphi*src_l(2)
2242 tmp2 = tmp2 - tmp3*cphi*src_l(4)
2243 dst_v = dst_v + alpha*(tmp+rcoef*tmp2)
2244 return
2245 end select
2246 ! Now p>1
2247 ! Precompute alpha*rcoef^l
2248 work(1) = alpha
2249 do l = 1, p
2250 work(l+1) = rcoef * work(l)
2251 end do
2252 ! Case m = 0
2253 ! P_{l-2}^m which is P_0^0 now
2254 pl2m = one
2255 dst_v = dst_v + work(1)*src_l(1)*vscales_rel(1)
2256 ! Update P_m^m for the next iteration
2257 pmm = -stheta
2258 ! P_{l-1}^m which is P_{m+1}^m now
2259 pl1m = ctheta
2260 ylm = pl1m * vscales_rel(3)
2261 dst_v = dst_v + work(2)*src_l(3)*ylm
2262 ! P_l^m for l>m+1
2263 do l = 2, p
2264 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
2265 plm = plm / dble(l)
2266 ylm = plm * vscales_rel(l*l+l+1)
2267 dst_v = dst_v + work(l+1)*src_l(l*l+l+1)*ylm
2268 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
2269 pl2m = pl1m
2270 pl1m = plm
2271 end do
2272 ! Prepare cos(m*phi) and sin(m*phi) for m=1
2273 cmphi = cphi
2274 smphi = sphi
2275 ! Case 0<m<p
2276 do m = 1, p-1
2277 ! P_{l-2}^m which is P_m^m now
2278 pl2m = pmm
2279 ylm = pmm * vscales_rel((m+1)**2)
2280 tmp1 = cmphi*src_l((m+1)**2) + smphi*src_l(m*m+1)
2281 dst_v = dst_v + work(m+1)*ylm*tmp1
2282 ! Temporary to reduce number of operations
2283 tmp1 = dble(2*m+1) * pmm
2284 ! Update P_m^m for the next iteration
2285 pmm = -stheta * tmp1
2286 ! P_{l-1}^m which is P_{m+1}^m now
2287 pl1m = ctheta * tmp1
2288 ylm = pl1m * vscales_rel((m+1)*(m+3))
2289 tmp1 = cmphi*src_l((m+1)*(m+3)) + smphi*src_l((m+1)*(m+2)+1-m)
2290 dst_v = dst_v + work(m+2)*ylm*tmp1
2291 ! P_l^m for l>m+1
2292 do l = m+2, p
2293 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
2294 plm = plm / dble(l-m)
2295 ylm = plm * vscales_rel(l*l+l+1+m)
2296 tmp1 = cmphi*src_l(l*l+l+1+m) + smphi*src_l(l*l+l+1-m)
2297 dst_v = dst_v + work(l+1)*ylm*tmp1
2298 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
2299 pl2m = pl1m
2300 pl1m = plm
2301 end do
2302 ! Update cos(m*phi) and sin(m*phi) for the next iteration
2303 tmp1 = cmphi
2304 cmphi = cmphi*cphi - smphi*sphi
2305 smphi = tmp1*sphi + smphi*cphi
2306 end do
2307 ! Case m=p requires only to use P_m^m
2308 ylm = pmm * vscales_rel((p+1)**2)
2309 tmp1 = cmphi*src_l((p+1)**2) + smphi*src_l(p*p+1)
2310 dst_v = dst_v + work(p+1)*ylm*tmp1
2311 ! Case of x(1:2) = 0 and x(3) != 0
2312 else
2313 ! In this case Y_l^m = 0 for m != 0, so only case m = 0 is taken into
2314 ! account. Y_l^m = ctheta^m in this case where ctheta is either +1 or
2315 ! -1. So, we copy sign(ctheta) into rcoef.
2316 rcoef = sign(rcoef, c(3))
2317 ! Init t and proceed with accumulation of a potential
2318 t = alpha
2319 indl = 1
2320 do l = 0, p
2321 ! Index of Y_l^0
2322 indl = indl + 2*l
2323 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
2324 dst_v = dst_v + t*src_l(indl)*vscales_rel(indl)
2325 ! Update t
2326 t = t * rcoef
2327 end do
2328 end if
2329end subroutine fmm_l2p_work
2330
2356subroutine fmm_l2p_bessel(c, src_r, p, vscales, alpha, src_l, beta, dst_v)
2357 ! Inputs
2358 integer, intent(in) :: p
2359 real(dp), intent(in) :: c(3), src_r, vscales((p+1)*(p+1)), alpha, &
2360 & src_l((p+1)*(p+1)), beta
2361 ! Output
2362 real(dp), intent(inout) :: dst_v
2363 ! Temporary workspace
2364 complex(dp) :: work_complex(max(2, p+1))
2365 real(dp) :: work(p+1)
2366 ! Local variables
2367 real(dp) :: src_si(p+1)
2368 ! Call corresponding work routine
2369 call modified_spherical_bessel_first_kind(p, src_r, src_si, work, &
2370 & work_complex)
2371 call fmm_l2p_bessel_work(c, p, vscales, src_si, alpha, src_l, beta, &
2372 & dst_v, work_complex, work)
2373end subroutine fmm_l2p_bessel
2374
2401subroutine fmm_l2p_bessel_work(c, p, vscales, SI_ri, alpha, src_l, beta, &
2402 & dst_v, work_complex, work)
2403 use complex_bessel
2404 !! Inputs
2405 integer, intent(in) :: p
2406 real(dp), intent(in) :: c(3), vscales((p+1)*(p+1)), alpha, &
2407 & src_l((p+1)*(p+1)), beta, SI_ri(p+1)
2408 !! Output
2409 real(dp), intent(inout) :: dst_v
2410 !! Workspace
2411 complex(dp), intent(out) :: work_complex(p+1)
2412 real(dp), intent(out) :: work(p+1)
2413 !! Local variables
2414 real(dp) :: rho, ctheta, stheta, cphi, sphi, t, tmp, tmp1, tmp2, &
2415 & tmp3, max12, ssq12, pl2m, pl1m, plm, pmm, cmphi, smphi, ylm
2416 real(dp) :: scaling_factor
2417 complex(dp) :: complex_argument
2418 integer :: l, m, indl, ierr, NZ
2419 real(dp), parameter :: fnu=1.5d0
2420 ! Scale output
2421 if (beta .eq. zero) then
2422 dst_v = zero
2423 else
2424 dst_v = beta * dst_v
2425 end if
2426 ! In case of zero alpha nothing else is required no matter what is the
2427 ! value of the induced potential
2428 if (alpha .eq. zero) then
2429 return
2430 end if
2431 ! Get spherical coordinates
2432 if (c(1) .eq. zero) then
2433 max12 = abs(c(2))
2434 ssq12 = one
2435 else if (abs(c(2)) .gt. abs(c(1))) then
2436 max12 = abs(c(2))
2437 ssq12 = one + (c(1)/c(2))**2
2438 else
2439 max12 = abs(c(1))
2440 ssq12 = one + (c(2)/c(1))**2
2441 end if
2442 ! Then we compute rho
2443 if (c(3) .eq. zero) then
2444 rho = max12 * sqrt(ssq12)
2445 else if (abs(c(3)) .gt. max12) then
2446 rho = one + ssq12 *(max12/c(3))**2
2447 rho = abs(c(3)) * sqrt(rho)
2448 else
2449 rho = ssq12 + (c(3)/max12)**2
2450 rho = max12 * sqrt(rho)
2451 end if
2452 ! In case of a singularity (rho=zero) induced potential depends only on the
2453 ! leading 1-st spherical harmonic and the value I_0(0)=1
2454 if (rho .eq. zero) then
2455 dst_v = dst_v + alpha*src_l(1)*vscales(1)/si_ri(1)
2456 return
2457 end if
2458 ! Compute Bessel function
2459 scaling_factor = alpha * sqrt(pi / (2*rho))
2460 ! NOTE: Complex argument is required to call I_J(x)
2461 complex_argument = rho
2462 ! Compute for l = 0
2463 call cbesi(complex_argument, pt5, 1, 1, work_complex(1), nz, ierr)
2464 ! Compute for l = 1,...,p
2465 if (p .gt. 0) then
2466 call cbesi(complex_argument, fnu, 1, p, work_complex(2:p+1), nz, ierr)
2467 end if
2468 work = scaling_factor * real(work_complex) / si_ri
2469 ! Compute the actual induced potential
2470 ! Length of a vector x(1:2)
2471 stheta = max12 * sqrt(ssq12)
2472 ! Case x(1:2) != 0
2473 if (stheta .ne. zero) then
2474 ! Normalize cphi and sphi
2475 cphi = c(1) / stheta
2476 sphi = c(2) / stheta
2477 ! Normalize ctheta and stheta
2478 ctheta = c(3) / rho
2479 stheta = stheta / rho
2480 ! Treat easy cases
2481 select case(p)
2482 case (0)
2483 ! l = 0
2484 dst_v = dst_v + work(1)*vscales(1)*src_l(1)
2485 return
2486 case (1)
2487 ! l = 0
2488 tmp = work(1) * src_l(1) * vscales(1)
2489 ! l = 1
2490 tmp2 = ctheta * src_l(3) * vscales(3)
2491 tmp3 = stheta * vscales(4)
2492 tmp2 = tmp2 - tmp3*sphi*src_l(2)
2493 tmp2 = tmp2 - tmp3*cphi*src_l(4)
2494 dst_v = dst_v + tmp + work(2)*tmp2
2495 return
2496 end select
2497 ! Now p>1
2498 ! Case m = 0
2499 ! P_{l-2}^m which is P_0^0 now
2500 pl2m = one
2501 dst_v = dst_v + work(1)*src_l(1)*vscales(1)
2502 ! Update P_m^m for the next iteration
2503 pmm = -stheta
2504 ! P_{l-1}^m which is P_{m+1}^m now
2505 pl1m = ctheta
2506 ylm = pl1m * vscales(3)
2507 dst_v = dst_v + work(2)*src_l(3)*ylm
2508 ! P_l^m for l>m+1
2509 do l = 2, p
2510 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
2511 plm = plm / dble(l)
2512 ylm = plm * vscales(l*l+l+1)
2513 dst_v = dst_v + work(l+1)*src_l(l*l+l+1)*ylm
2514 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
2515 pl2m = pl1m
2516 pl1m = plm
2517 end do
2518 ! Prepare cos(m*phi) and sin(m*phi) for m=1
2519 cmphi = cphi
2520 smphi = sphi
2521 ! Case 0<m<p
2522 do m = 1, p-1
2523 ! P_{l-2}^m which is P_m^m now
2524 pl2m = pmm
2525 ylm = pmm * vscales((m+1)**2)
2526 tmp1 = cmphi*src_l((m+1)**2) + smphi*src_l(m*m+1)
2527 dst_v = dst_v + work(m+1)*ylm*tmp1
2528 ! Temporary to reduce number of operations
2529 tmp1 = dble(2*m+1) * pmm
2530 ! Update P_m^m for the next iteration
2531 pmm = -stheta * tmp1
2532 ! P_{l-1}^m which is P_{m+1}^m now
2533 pl1m = ctheta * tmp1
2534 ylm = pl1m * vscales((m+1)*(m+3))
2535 tmp1 = cmphi*src_l((m+1)*(m+3)) + smphi*src_l((m+1)*(m+2)+1-m)
2536 dst_v = dst_v + work(m+2)*ylm*tmp1
2537 ! P_l^m for l>m+1
2538 do l = m+2, p
2539 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
2540 plm = plm / dble(l-m)
2541 ylm = plm * vscales(l*l+l+1+m)
2542 tmp1 = cmphi*src_l(l*l+l+1+m) + smphi*src_l(l*l+l+1-m)
2543 dst_v = dst_v + work(l+1)*ylm*tmp1
2544 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
2545 pl2m = pl1m
2546 pl1m = plm
2547 end do
2548 ! Update cos(m*phi) and sin(m*phi) for the next iteration
2549 tmp1 = cmphi
2550 cmphi = cmphi*cphi - smphi*sphi
2551 smphi = tmp1*sphi + smphi*cphi
2552 end do
2553 ! Case m=p requires only to use P_m^m
2554 ylm = pmm * vscales((p+1)**2)
2555 tmp1 = cmphi*src_l((p+1)**2) + smphi*src_l(p*p+1)
2556 dst_v = dst_v + work(p+1)*ylm*tmp1
2557 ! Case of x(1:2) = 0 and x(3) != 0
2558 else
2559 ! In this case Y_l^m = 0 for m != 0, so only case m = 0 is taken into
2560 ! account. Y_l^0 = ctheta^l in this case where ctheta is either +1 or
2561 ! -1. So, we copy sign(ctheta) into rcoef. But before that we
2562 ! initialize alpha/r factor for l=0 case without taking possible sign
2563 ! into account.
2564 t = one
2565 if (c(3) .lt. zero) then
2566 ! Proceed with accumulation of a potential
2567 indl = 1
2568 do l = 0, p
2569 ! Index of Y_l^0
2570 indl = indl + 2*l
2571 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
2572 dst_v = dst_v + t*work(l+1)*src_l(indl)*vscales(indl)
2573 t = -t
2574 end do
2575 else
2576 ! Proceed with accumulation of a potential
2577 indl = 1
2578 do l = 0, p
2579 ! Index of Y_l^0
2580 indl = indl + 2*l
2581 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
2582 dst_v = dst_v + work(l+1)*src_l(indl)*vscales(indl)
2583 end do
2584 end if
2585 end if
2586end subroutine fmm_l2p_bessel_work
2587
2588subroutine fmm_l2p_bessel_grad(c, src_r, p, vscales, alpha, src_l, beta, dst_g)
2589 use complex_bessel
2590 ! Inputs
2591 integer, intent(in) :: p
2592 real(dp), intent(in) :: c(3), src_r, vscales((p+2)*(p+2)), alpha, &
2593 & src_l((p+1)*(p+1)), beta
2594 ! Output
2595 real(dp), intent(inout) :: dst_g(3)
2596 ! Temporary workspace
2597 complex(dp) :: work_complex(p+2)
2598 real(dp) :: work(p+2), src_si(p+2), src_l_grad((p+2)**2, 3)
2599 ! Call corresponding work routine
2600 call modified_spherical_bessel_first_kind(p+1, src_r, src_si, work, &
2601 & work_complex)
2602 call fmm_l2l_bessel_grad(p, src_si, vscales, src_l, src_l_grad)
2603 call fmm_l2p_bessel_work(c, p+1, vscales, src_si, -alpha, src_l_grad(:, 1), &
2604 & beta, dst_g(1), work_complex, work)
2605 call fmm_l2p_bessel_work(c, p+1, vscales, src_si, -alpha, src_l_grad(:, 2), &
2606 & beta, dst_g(2), work_complex, work)
2607 call fmm_l2p_bessel_work(c, p+1, vscales, src_si, -alpha, src_l_grad(:, 3), &
2608 & beta, dst_g(3), work_complex, work)
2609end subroutine fmm_l2p_bessel_grad
2610
2635subroutine fmm_l2p_adj(c, src_q, dst_r, p, vscales_rel, beta, dst_l)
2636 ! Inputs
2637 integer, intent(in) :: p
2638 real(dp), intent(in) :: c(3), src_q, dst_r, vscales_rel((p+1)*(p+1)), beta
2639 ! Output
2640 real(dp), intent(inout) :: dst_l((p+1)**2)
2641 ! Workspace
2642 real(dp) :: work(p+1)
2643 ! Call corresponding work routine
2644 call fmm_l2p_adj_work(c, src_q, dst_r, p, vscales_rel, beta, dst_l, work)
2645end subroutine fmm_l2p_adj
2646
2672subroutine fmm_l2p_adj_work(c, src_q, dst_r, p, vscales_rel, beta, dst_l, work)
2673 ! Inputs
2674 integer, intent(in) :: p
2675 real(dp), intent(in) :: c(3), src_q, dst_r, vscales_rel((p+1)*(p+1)), beta
2676 ! Output
2677 real(dp), intent(inout) :: dst_l((p+1)**2)
2678 ! Workspace
2679 real(dp), intent(out), target :: work(p+1)
2680 ! Local variables
2681 real(dp) :: rho, ctheta, stheta, cphi, sphi, rcoef, t, tmp, tmp1, tmp2, &
2682 & max12, ssq12, pl2m, pl1m, plm, pmm, cmphi, smphi, ylm
2683 integer :: l, m, indl
2684 ! In case src_q is zero just scale output properly
2685 if (src_q .eq. zero) then
2686 ! Zero init output if beta is also zero
2687 if (beta .eq. zero) then
2688 dst_l = zero
2689 ! Scale output by beta otherwise
2690 else
2691 dst_l = beta * dst_l
2692 end if
2693 ! Exit subroutine
2694 return
2695 end if
2696 ! Now src_q is non-zero
2697 ! Get spherical coordinates
2698 if (c(1) .eq. zero) then
2699 max12 = abs(c(2))
2700 ssq12 = one
2701 else if (abs(c(2)) .gt. abs(c(1))) then
2702 max12 = abs(c(2))
2703 ssq12 = one + (c(1)/c(2))**2
2704 else
2705 max12 = abs(c(1))
2706 ssq12 = one + (c(2)/c(1))**2
2707 end if
2708 ! Then we compute rho
2709 if (c(3) .eq. zero) then
2710 rho = max12 * sqrt(ssq12)
2711 else if (abs(c(3)) .gt. max12) then
2712 rho = one + ssq12 *(max12/c(3))**2
2713 rho = abs(c(3)) * sqrt(rho)
2714 else
2715 rho = ssq12 + (c(3)/max12)**2
2716 rho = max12 * sqrt(rho)
2717 end if
2718 ! In case of a singularity (rho=zero) induced potential depends only on the
2719 ! leading 1-st spherical harmonic, so in adjoint way we update only the leading
2720 if (rho .eq. zero) then
2721 if (beta .eq. zero) then
2722 dst_l(1) = src_q * vscales_rel(1)
2723 dst_l(2:) = zero
2724 else
2725 dst_l(1) = beta*dst_l(1) + src_q*vscales_rel(1)
2726 dst_l(2:) = beta * dst_l(2:)
2727 end if
2728 return
2729 end if
2730 ! Compute the actual induced potential
2731 rcoef = rho / dst_r
2732 ! Length of a vector x(1:2)
2733 stheta = max12 * sqrt(ssq12)
2734 ! Case x(1:2) != 0
2735 if (stheta .ne. zero) then
2736 ! Normalize cphi and sphi
2737 cphi = c(1) / stheta
2738 sphi = c(2) / stheta
2739 ! Normalize ctheta and stheta
2740 ctheta = c(3) / rho
2741 stheta = stheta / rho
2742 ! Treat easy cases
2743 select case(p)
2744 case (0)
2745 ! l = 0
2746 if (beta .eq. zero) then
2747 dst_l(1) = src_q * vscales_rel(1)
2748 else
2749 dst_l(1) = beta*dst_l(1) + src_q*vscales_rel(1)
2750 end if
2751 return
2752 case (1)
2753 ! l = 0 and l = 1
2754 tmp = src_q * rcoef
2755 tmp2 = -tmp * vscales_rel(4) * stheta
2756 if (beta .eq. zero) then
2757 dst_l(1) = src_q * vscales_rel(1)
2758 dst_l(2) = tmp2 * sphi
2759 dst_l(3) = tmp * vscales_rel(3) * ctheta
2760 dst_l(4) = tmp2 * cphi
2761 else
2762 dst_l(1) = beta*dst_l(1) + src_q*vscales_rel(1)
2763 dst_l(2) = beta*dst_l(2) + tmp2*sphi
2764 dst_l(3) = beta*dst_l(3) + tmp*vscales_rel(3)*ctheta
2765 dst_l(4) = beta*dst_l(4) + tmp2*cphi
2766 end if
2767 return
2768 end select
2769 ! Now p>1
2770 ! Precompute src_q*rcoef^l
2771 work(1) = src_q
2772 do l = 1, p
2773 work(l+1) = rcoef * work(l)
2774 end do
2775 ! Overwrite output
2776 if (beta .eq. zero) then
2777 ! Case m = 0
2778 ! P_{l-2}^m which is P_0^0 now
2779 pl2m = one
2780 dst_l(1) = work(1) * vscales_rel(1)
2781 ! Update P_m^m for the next iteration
2782 pmm = -stheta
2783 ! P_{l-1}^m which is P_{m+1}^m now
2784 pl1m = ctheta
2785 ylm = pl1m * vscales_rel(3)
2786 dst_l(3) = work(2) * ylm
2787 ! P_l^m for l>m+1
2788 do l = 2, p
2789 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
2790 plm = plm / dble(l)
2791 ylm = plm * vscales_rel(l*l+l+1)
2792 dst_l(l*l+l+1) = work(l+1) * ylm
2793 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
2794 pl2m = pl1m
2795 pl1m = plm
2796 end do
2797 ! Prepare cos(m*phi) and sin(m*phi) for m=1
2798 cmphi = cphi
2799 smphi = sphi
2800 ! Case 0<m<p
2801 do m = 1, p-1
2802 ! P_{l-2}^m which is P_m^m now
2803 pl2m = pmm
2804 ylm = pmm * vscales_rel((m+1)**2)
2805 ylm = work(m+1) * ylm
2806 dst_l((m+1)**2) = cmphi * ylm
2807 dst_l(m*m+1) = smphi * ylm
2808 ! Temporary to reduce number of operations
2809 tmp1 = dble(2*m+1) * pmm
2810 ! Update P_m^m for the next iteration
2811 pmm = -stheta * tmp1
2812 ! P_{l-1}^m which is P_{m+1}^m now
2813 pl1m = ctheta * tmp1
2814 ylm = pl1m * vscales_rel((m+1)*(m+3))
2815 ylm = work(m+2) * ylm
2816 dst_l((m+1)*(m+3)) = cmphi * ylm
2817 dst_l((m+1)*(m+2)+1-m) = smphi * ylm
2818 ! P_l^m for l>m+1
2819 do l = m+2, p
2820 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
2821 plm = plm / dble(l-m)
2822 ylm = plm * vscales_rel(l*l+l+1+m)
2823 ylm = work(l+1) * ylm
2824 dst_l(l*l+l+1+m) = cmphi * ylm
2825 dst_l(l*l+l+1-m) = smphi * ylm
2826 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
2827 pl2m = pl1m
2828 pl1m = plm
2829 end do
2830 ! Update cos(m*phi) and sin(m*phi) for the next iteration
2831 tmp1 = cmphi
2832 cmphi = cmphi*cphi - smphi*sphi
2833 smphi = tmp1*sphi + smphi*cphi
2834 end do
2835 ! Case m=p requires only to use P_m^m
2836 ylm = pmm * vscales_rel((p+1)**2)
2837 ylm = work(p+1) * ylm
2838 dst_l((p+1)**2) = cmphi * ylm
2839 dst_l(p*p+1) = smphi * ylm
2840 ! Update output
2841 else
2842 ! Case m = 0
2843 ! P_{l-2}^m which is P_0^0 now
2844 pl2m = one
2845 dst_l(1) = beta*dst_l(1) + work(1)*vscales_rel(1)
2846 ! Update P_m^m for the next iteration
2847 pmm = -stheta
2848 ! P_{l-1}^m which is P_{m+1}^m now
2849 pl1m = ctheta
2850 ylm = pl1m * vscales_rel(3)
2851 dst_l(3) = beta*dst_l(3) + work(2)*ylm
2852 ! P_l^m for l>m+1
2853 do l = 2, p
2854 plm = dble(2*l-1)*ctheta*pl1m - dble(l-1)*pl2m
2855 plm = plm / dble(l)
2856 ylm = plm * vscales_rel(l*l+l+1)
2857 dst_l(l*l+l+1) = beta*dst_l(l*l+l+1) + work(l+1)*ylm
2858 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
2859 pl2m = pl1m
2860 pl1m = plm
2861 end do
2862 ! Prepare cos(m*phi) and sin(m*phi) for m=1
2863 cmphi = cphi
2864 smphi = sphi
2865 ! Case 0<m<p
2866 do m = 1, p-1
2867 ! P_{l-2}^m which is P_m^m now
2868 pl2m = pmm
2869 ylm = pmm * vscales_rel((m+1)**2)
2870 ylm = work(m+1) * ylm
2871 dst_l((m+1)**2) = beta*dst_l((m+1)**2) + cmphi*ylm
2872 dst_l(m*m+1) = beta*dst_l(m*m+1) + smphi*ylm
2873 ! Temporary to reduce number of operations
2874 tmp1 = dble(2*m+1) * pmm
2875 ! Update P_m^m for the next iteration
2876 pmm = -stheta * tmp1
2877 ! P_{l-1}^m which is P_{m+1}^m now
2878 pl1m = ctheta * tmp1
2879 ylm = pl1m * vscales_rel((m+1)*(m+3))
2880 ylm = work(m+2) * ylm
2881 dst_l((m+1)*(m+3)) = beta*dst_l((m+1)*(m+3)) + cmphi*ylm
2882 dst_l((m+1)*(m+2)+1-m) = beta*dst_l((m+1)*(m+2)+1-m) + &
2883 & smphi*ylm
2884 ! P_l^m for l>m+1
2885 do l = m+2, p
2886 plm = dble(2*l-1)*ctheta*pl1m - dble(l+m-1)*pl2m
2887 plm = plm / dble(l-m)
2888 ylm = plm * vscales_rel(l*l+l+1+m)
2889 ylm = work(l+1) * ylm
2890 dst_l(l*l+l+1+m) = beta*dst_l(l*l+l+1+m) + cmphi*ylm
2891 dst_l(l*l+l+1-m) = beta*dst_l(l*l+l+1-m) + smphi*ylm
2892 ! Update P_{l-2}^m and P_{l-1}^m for the next iteration
2893 pl2m = pl1m
2894 pl1m = plm
2895 end do
2896 ! Update cos(m*phi) and sin(m*phi) for the next iteration
2897 tmp1 = cmphi
2898 cmphi = cmphi*cphi - smphi*sphi
2899 smphi = tmp1*sphi + smphi*cphi
2900 end do
2901 ! Case m=p requires only to use P_m^m
2902 ylm = pmm * vscales_rel((p+1)**2)
2903 ylm = work(p+1) * ylm
2904 dst_l((p+1)**2) = beta*dst_l((p+1)**2) + cmphi*ylm
2905 dst_l(p*p+1) = beta*dst_l(p*p+1) + smphi*ylm
2906 end if
2907 ! Case of x(1:2) = 0 and x(3) != 0
2908 else
2909 ! In this case Y_l^m = 0 for m != 0, so only case m = 0 is taken into
2910 ! account. Y_l^m = ctheta^m in this case where ctheta is either +1 or
2911 ! -1. So, we copy sign(ctheta) into rcoef.
2912 rcoef = sign(rcoef, c(3))
2913 ! Init t and proceed with accumulation of a potential
2914 t = src_q
2915 indl = 1
2916 ! Overwrite output
2917 if (beta .eq. zero) then
2918 do l = 0, p
2919 ! Index of Y_l^0
2920 indl = indl + 2*l
2921 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
2922 dst_l(indl) = t * vscales_rel(indl)
2923 dst_l(indl-l:indl-1) = zero
2924 dst_l(indl+1:indl+l) = zero
2925 ! Update t
2926 t = t * rcoef
2927 end do
2928 ! Update output
2929 else
2930 do l = 0, p
2931 ! Index of Y_l^0
2932 indl = indl + 2*l
2933 ! Add 4*pi/(2*l+1)*rcoef^{l+1}*Y_l^0 contribution
2934 dst_l(indl) = beta*dst_l(indl) + t*vscales_rel(indl)
2935 dst_l(indl-l:indl-1) = beta * dst_l(indl-l:indl-1)
2936 dst_l(indl+1:indl+l) = beta * dst_l(indl+1:indl+l)
2937 ! Update t
2938 t = t * rcoef
2939 end do
2940 end if
2941 end if
2942end subroutine fmm_l2p_adj_work
2943
2998subroutine fmm_sph_transform(p, r1, alpha, src, beta, dst)
2999 ! Inputs
3000 integer, intent(in) :: p
3001 real(dp), intent(in) :: r1(-1:1, -1:1), alpha, src((p+1)*(p+1)), beta
3002 ! Output
3003 real(dp), intent(inout) :: dst((p+1)*(p+1))
3004 ! Temporary workspace
3005 real(dp) :: work(2*(2*p+1)*(2*p+3))
3006 ! Call corresponding work routine
3007 call fmm_sph_transform_work(p, r1, alpha, src, beta, dst, work)
3008end subroutine fmm_sph_transform
3009
3023subroutine fmm_sph_transform_work(p, r1, alpha, src, beta, dst, work)
3024 ! Inputs
3025 integer, intent(in) :: p
3026 real(dp), intent(in) :: r1(-1:1, -1:1), alpha, src((p+1)*(p+1)), beta
3027 ! Output
3028 real(dp), intent(out) :: dst((p+1)*(p+1))
3029 ! Temporary workspace
3030 real(dp), intent(out), target :: work(2*(2*p+1)*(2*p+3))
3031 ! Local variables
3032 real(dp) :: u, v, w
3033 integer :: l, m, n, ind
3034 ! Pointers for a workspace
3035 real(dp), pointer :: r_prev(:, :), r(:, :), scal_uvw_m(:), scal_u_n(:), &
3036 & scal_v_n(:), scal_w_n(:), r_swap(:, :)
3037 ! In case alpha is zero just scale output
3038 if (alpha .eq. zero) then
3039 ! Set output to zero if beta is also zero
3040 if (beta .eq. zero) then
3041 dst = zero
3042 else
3043 dst = beta * dst
3044 end if
3045 ! Exit subroutine
3046 return
3047 end if
3048 ! Now alpha is non-zero
3049 ! In case beta is zero output is just overwritten without being read
3050 if (beta .eq. zero) then
3051 ! Compute rotations/reflections
3052 ! l = 0
3053 dst(1) = alpha * src(1)
3054 if (p .eq. 0) then
3055 return
3056 end if
3057 ! l = 1
3058 dst(2) = alpha * dot_product(r1(-1:1,-1), src(2:4))
3059 dst(3) = alpha * dot_product(r1(-1:1,0), src(2:4))
3060 dst(4) = alpha * dot_product(r1(-1:1,1), src(2:4))
3061 if (p .eq. 1) then
3062 return
3063 end if
3064 ! Set pointers
3065 n = 2*p + 1
3066 l = n ** 2
3067 r_prev(-p:p, -p:p) => work(1:l)
3068 m = 2*l
3069 r(-p:p, -p:p) => work(l+1:m)
3070 l = m + n
3071 scal_uvw_m(-p:p) => work(m+1:l)
3072 m = l + n
3073 scal_u_n(-p:p) => work(l+1:m)
3074 l = m + n
3075 scal_v_n(-p:p) => work(m+1:l)
3076 m = l + n
3077 scal_w_n(-p:p) => work(l+1:m)
3078 ! l = 2
3079 r(2, 2) = (r1(1, 1)*r1(1, 1) - r1(1, -1)*r1(1, -1) - &
3080 & r1(-1, 1)*r1(-1, 1) + r1(-1, -1)*r1(-1, -1)) / two
3081 r(2, 1) = r1(1, 1)*r1(1, 0) - r1(-1, 1)*r1(-1, 0)
3082 r(2, 0) = sqrt3 / two * (r1(1, 0)*r1(1, 0) - r1(-1, 0)*r1(-1, 0))
3083 r(2, -1) = r1(1, -1)*r1(1, 0) - r1(-1, -1)*r1(-1, 0)
3084 r(2, -2) = r1(1, 1)*r1(1, -1) - r1(-1, 1)*r1(-1, -1)
3085 r(1, 2) = r1(1, 1)*r1(0, 1) - r1(1, -1)*r1(0, -1)
3086 r(1, 1) = r1(1, 1)*r1(0, 0) + r1(1, 0)*r1(0, 1)
3087 r(1, 0) = sqrt3 * r1(1, 0) * r1(0, 0)
3088 r(1, -1) = r1(1, -1)*r1(0, 0) + r1(1, 0)*r1(0, -1)
3089 r(1, -2) = r1(1, 1)*r1(0, -1) + r1(1, -1)*r1(0, 1)
3090 r(0, 2) = sqrt3 / two * (r1(0, 1)*r1(0, 1) - r1(0, -1)*r1(0, -1))
3091 r(0, 1) = sqrt3 * r1(0, 1) * r1(0, 0)
3092 r(0, 0) = (three*r1(0, 0)*r1(0, 0)-one) / two
3093 r(0, -1) = sqrt3 * r1(0, -1) * r1(0, 0)
3094 r(0, -2) = sqrt3 * r1(0, 1) * r1(0, -1)
3095 r(-1, 2) = r1(-1, 1)*r1(0, 1) - r1(-1, -1)*r1(0, -1)
3096 r(-1, 1) = r1(-1, 1)*r1(0, 0) + r1(-1, 0)*r1(0, 1)
3097 r(-1, 0) = sqrt3 * r1(-1, 0) * r1(0, 0)
3098 r(-1, -1) = r1(-1, -1)*r1(0, 0) + r1(0, -1)*r1(-1, 0)
3099 r(-1, -2) = r1(-1, 1)*r1(0, -1) + r1(-1, -1)*r1(0, 1)
3100 r(-2, 2) = r1(1, 1)*r1(-1, 1) - r1(1, -1)*r1(-1, -1)
3101 r(-2, 1) = r1(1, 1)*r1(-1, 0) + r1(1, 0)*r1(-1, 1)
3102 r(-2, 0) = sqrt3 * r1(1, 0) * r1(-1, 0)
3103 r(-2, -1) = r1(1, -1)*r1(-1, 0) + r1(1, 0)*r1(-1, -1)
3104 r(-2, -2) = r1(1, 1)*r1(-1, -1) + r1(1, -1)*r1(-1, 1)
3105 do m = -2, 2
3106 dst(7+m) = alpha * dot_product(r(-2:2, m), src(5:9))
3107 end do
3108 ! l > 2
3109 do l = 3, p
3110 ! Swap previous and current rotation matrices
3111 r_swap => r_prev
3112 r_prev => r
3113 r => r_swap
3114 ! Prepare scalar factors
3115 scal_uvw_m(0) = dble(l)
3116 do m = 1, l-1
3117 u = sqrt(dble(l*l-m*m))
3118 scal_uvw_m(m) = u
3119 scal_uvw_m(-m) = u
3120 end do
3121 u = two * dble(l)
3122 u = sqrt(dble(u*(u-one)))
3123 scal_uvw_m(l) = u
3124 scal_uvw_m(-l) = u
3125 scal_u_n(0) = dble(l)
3126 scal_v_n(0) = -sqrt(dble(l*(l-1))) / sqrt2
3127 scal_w_n(0) = zero
3128 do n = 1, l-2
3129 u = sqrt(dble(l*l-n*n))
3130 scal_u_n(n) = u
3131 scal_u_n(-n) = u
3132 v = dble(l+n)
3133 v = sqrt(v*(v-one)) / two
3134 scal_v_n(n) = v
3135 scal_v_n(-n) = v
3136 w = dble(l-n)
3137 w = -sqrt(w*(w-one)) / two
3138 scal_w_n(n) = w
3139 scal_w_n(-n) = w
3140 end do
3141 u = sqrt(dble(2*l-1))
3142 scal_u_n(l-1) = u
3143 scal_u_n(1-l) = u
3144 scal_u_n(l) = zero
3145 scal_u_n(-l) = zero
3146 v = sqrt(dble((2*l-1)*(2*l-2))) / two
3147 scal_v_n(l-1) = v
3148 scal_v_n(1-l) = v
3149 v = sqrt(dble(2*l*(2*l-1))) / two
3150 scal_v_n(l) = v
3151 scal_v_n(-l) = v
3152 scal_w_n(l-1) = zero
3153 scal_w_n(l) = zero
3154 scal_w_n(-l) = zero
3155 scal_w_n(1-l) = zero
3156 ind = l*l + l + 1
3157 ! m = l, n = l
3158 v = r1(1, 1)*r_prev(l-1, l-1) - r1(1, -1)*r_prev(l-1, 1-l) - &
3159 & r1(-1, 1)*r_prev(1-l, l-1) + r1(-1, -1)*r_prev(1-l, 1-l)
3160 r(l, l) = v * scal_v_n(l) / scal_uvw_m(l)
3161 !dst(ind+l) = src(ind+l) * r(l, l)
3162 ! m = l, n = -l
3163 v = r1(1, 1)*r_prev(1-l, l-1) - r1(1, -1)*r_prev(1-l, 1-l) + &
3164 & r1(-1, 1)*r_prev(l-1, l-1) - r1(-1, -1)*r_prev(l-1, 1-l)
3165 r(-l, l) = v * scal_v_n(l) / scal_uvw_m(l)
3166 !dst(ind+l) = dst(ind+l) + src(ind-l)*r(-l, l)
3167 ! m = l, n = l-1
3168 u = r1(0, 1)*r_prev(l-1, l-1) - r1(0, -1)*r_prev(l-1, 1-l)
3169 v = r1(1, 1)*r_prev(l-2, l-1) - r1(1, -1)*r_prev(l-2, 1-l) - &
3170 & r1(-1, 1)*r_prev(2-l, l-1) + r1(-1, -1)*r_prev(2-l, 1-l)
3171 r(l-1, l) = (u*scal_u_n(l-1)+v*scal_v_n(l-1)) / scal_uvw_m(l)
3172 !dst(ind+l) = dst(ind+l) + src(ind+l-1)*r(l-1, l)
3173 ! m = l, n = 1-l
3174 u = r1(0, 1)*r_prev(1-l, l-1) - r1(0, -1)*r_prev(1-l, 1-l)
3175 v = r1(1, 1)*r_prev(2-l, l-1) - r1(1, -1)*r_prev(2-l, 1-l) + &
3176 & r1(-1, 1)*r_prev(l-2, l-1) - r1(-1, -1)*r_prev(l-2, 1-l)
3177 r(1-l, l) = (u*scal_u_n(l-1)+v*scal_v_n(l-1)) / scal_uvw_m(l)
3178 !dst(ind+l) = dst(ind+l) + src(ind+1-l)*r(1-l, l)
3179 ! m = l, n = 1
3180 u = r1(0, 1)*r_prev(1, l-1) - r1(0, -1)*r_prev(1, 1-l)
3181 v = r1(1, 1)*r_prev(0, l-1) - r1(1, -1)*r_prev(0, 1-l)
3182 w = r1(1, 1)*r_prev(2, l-1) - r1(1, -1)*r_prev(2, 1-l) + &
3183 & r1(-1, 1)*r_prev(-2, l-1) - r1(-1, -1)*r_prev(-2, 1-l)
3184 r(1, l) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3185 r(1, l) = r(1, l) / scal_uvw_m(l)
3186 !dst(ind+l) = dst(ind+l) + src(ind+1)*r(1, l)
3187 ! m = l, n = -1
3188 u = r1(0, 1)*r_prev(-1, l-1) - r1(0, -1)*r_prev(-1, 1-l)
3189 v = r1(-1, 1)*r_prev(0, l-1) - r1(-1, -1)*r_prev(0, 1-l)
3190 w = r1(1, 1)*r_prev(-2, l-1) - r1(1, -1)*r_prev(-2, 1-l) - &
3191 & r1(-1, 1)*r_prev(2, l-1) + r1(-1, -1)*r_prev(2, 1-l)
3192 r(-1, l) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3193 r(-1, l) = r(-1, l) / scal_uvw_m(l)
3194 !dst(ind+l) = dst(ind+l) + src(ind-1)*r(-1, l)
3195 ! m = l, n = 0
3196 u = r1(0, 1)*r_prev(0, l-1) - r1(0, -1)*r_prev(0, 1-l)
3197 v = r1(1, 1)*r_prev(1, l-1) - r1(1, -1)*r_prev(1, 1-l) + &
3198 & r1(-1, 1)*r_prev(-1, l-1) - r1(-1, -1)*r_prev(-1, 1-l)
3199 r(0, l) = (u*scal_u_n(0) + v*scal_v_n(0)) / scal_uvw_m(l)
3200 !dst(ind+l) = dst(ind+l) + src(ind)*r(0, l)
3201 ! m = l, n = 2-l..l-2, n != -1,0,1
3202 do n = 2, l-2
3203 u = r1(0, 1)*r_prev(n, l-1) - r1(0, -1)*r_prev(n, 1-l)
3204 v = r1(1, 1)*r_prev(n-1, l-1) - r1(1, -1)*r_prev(n-1, 1-l) - &
3205 & r1(-1, 1)*r_prev(1-n, l-1) + r1(-1, -1)*r_prev(1-n, 1-l)
3206 w = r1(1, 1)*r_prev(n+1, l-1) - r1(1, -1)*r_prev(n+1, 1-l) + &
3207 & r1(-1, 1)*r_prev(-n-1, l-1) - r1(-1, -1)*r_prev(-n-1, 1-l)
3208 r(n, l) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3209 r(n, l) = r(n, l) / scal_uvw_m(l)
3210 !dst(ind+l) = dst(ind+l) + src(ind+n)*r(n, l)
3211 u = r1(0, 1)*r_prev(-n, l-1) - r1(0, -1)*r_prev(-n, 1-l)
3212 v = r1(1, 1)*r_prev(1-n, l-1) - r1(1, -1)*r_prev(1-n, 1-l) + &
3213 & r1(-1, 1)*r_prev(n-1, l-1) - r1(-1, -1)*r_prev(n-1, 1-l)
3214 w = r1(1, 1)*r_prev(-n-1, l-1) - r1(1, -1)*r_prev(-n-1, 1-l) - &
3215 & r1(-1, 1)*r_prev(n+1, l-1) + r1(-1, -1)*r_prev(n+1, 1-l)
3216 r(-n, l) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3217 r(-n, l) = r(-n, l) / scal_uvw_m(l)
3218 !dst(ind+l) = dst(ind+l) + src(ind-n)*r(-n, l)
3219 end do
3220 dst(ind+l) = alpha * dot_product(src(ind-l:ind+l), r(-l:l, l))
3221 ! m = -l, n = l
3222 v = r1(1, 1)*r_prev(l-1, 1-l) + r1(1, -1)*r_prev(l-1, l-1) - &
3223 & r1(-1, 1)*r_prev(1-l, 1-l) - r1(-1, -1)*r_prev(1-l, l-1)
3224 r(l, -l) = v * scal_v_n(l) / scal_uvw_m(l)
3225 !dst(ind-l) = src(ind+l)*r(l, -l)
3226 ! m = -l, n = -l
3227 v = r1(1, 1)*r_prev(1-l, 1-l) + r1(1, -1)*r_prev(1-l, l-1) + &
3228 & r1(-1, 1)*r_prev(l-1, 1-l) + r1(-1, -1)*r_prev(l-1, l-1)
3229 r(-l, -l) = v * scal_v_n(l) / scal_uvw_m(l)
3230 !dst(ind-l) = dst(ind-l) + src(ind-l)*r(-l, -l)
3231 ! m = -l, n = l-1
3232 u = r1(0, 1)*r_prev(l-1, 1-l) + r1(0, -1)*r_prev(l-1, l-1)
3233 v = r1(1, 1)*r_prev(l-2, 1-l) + r1(1, -1)*r_prev(l-2, l-1) - &
3234 & r1(-1, 1)*r_prev(2-l, 1-l) - r1(-1, -1)*r_prev(2-l, l-1)
3235 r(l-1, -l) = (u*scal_u_n(l-1)+v*scal_v_n(l-1)) / scal_uvw_m(l)
3236 !dst(ind-l) = dst(ind-l) + src(ind+l-1)*r(l-1, -l)
3237 ! m = -l, n = 1-l
3238 u = r1(0, 1)*r_prev(1-l, 1-l) + r1(0, -1)*r_prev(1-l, l-1)
3239 v = r1(1, 1)*r_prev(2-l, 1-l) + r1(1, -1)*r_prev(2-l, l-1) + &
3240 & r1(-1, 1)*r_prev(l-2, 1-l) + r1(-1, -1)*r_prev(l-2, l-1)
3241 r(1-l, -l) = (u*scal_u_n(l-1)+v*scal_v_n(l-1)) / scal_uvw_m(l)
3242 !dst(ind-l) = dst(ind-l) + src(ind+1-l)*r(1-l, -l)
3243 ! m = -l, n = 1
3244 u = r1(0, 1)*r_prev(1, 1-l) + r1(0, -1)*r_prev(1, l-1)
3245 v = r1(1, 1)*r_prev(0, 1-l) + r1(1, -1)*r_prev(0, l-1)
3246 w = r1(1, 1)*r_prev(2, 1-l) + r1(1, -1)*r_prev(2, l-1) + &
3247 & r1(-1, 1)*r_prev(-2, 1-l) + r1(-1, -1)*r_prev(-2, l-1)
3248 r(1, -l) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3249 r(1, -l) = r(1, -l) / scal_uvw_m(l)
3250 !dst(ind-l) = dst(ind-l) + src(ind+1)*r(1, -l)
3251 ! m = -l, n = -1
3252 u = r1(0, 1)*r_prev(-1, 1-l) + r1(0, -1)*r_prev(-1, l-1)
3253 v = r1(-1, 1)*r_prev(0, 1-l) + r1(-1, -1)*r_prev(0, l-1)
3254 w = r1(1, 1)*r_prev(-2, 1-l) + r1(1, -1)*r_prev(-2, l-1) - &
3255 & r1(-1, 1)*r_prev(2, 1-l) - r1(-1, -1)*r_prev(2, l-1)
3256 r(-1, -l) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3257 r(-1, -l) = r(-1, -l) / scal_uvw_m(l)
3258 !dst(ind-l) = dst(ind-l) + src(ind-1)*r(-1, -l)
3259 ! m = -l, n = 0
3260 u = r1(0, 1)*r_prev(0, 1-l) + r1(0, -1)*r_prev(0, l-1)
3261 v = r1(1, 1)*r_prev(1, 1-l) + r1(1, -1)*r_prev(1, l-1) + &
3262 & r1(-1, 1)*r_prev(-1, 1-l) + r1(-1, -1)*r_prev(-1, l-1)
3263 r(0, -l) = (u*scal_u_n(0) + v*scal_v_n(0)) / scal_uvw_m(l)
3264 !dst(ind-l) = dst(ind-l) + src(ind)*r(0, -l)
3265 ! m = -l, n = 2-l..l-2, n != -1,0,1
3266 do n = 2, l-2
3267 u = r1(0, 1)*r_prev(n, 1-l) + r1(0, -1)*r_prev(n, l-1)
3268 v = r1(1, 1)*r_prev(n-1, 1-l) + r1(1, -1)*r_prev(n-1, l-1) - &
3269 & r1(-1, 1)*r_prev(1-n, 1-l) - r1(-1, -1)*r_prev(1-n, l-1)
3270 w = r1(1, 1)*r_prev(n+1, 1-l) + r1(1, -1)*r_prev(n+1, l-1) + &
3271 & r1(-1, 1)*r_prev(-n-1, 1-l) + r1(-1, -1)*r_prev(-n-1, l-1)
3272 r(n, -l) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3273 r(n, -l) = r(n, -l) / scal_uvw_m(l)
3274 !dst(ind-l) = dst(ind-l) + src(ind+n)*r(n, -l)
3275 u = r1(0, 1)*r_prev(-n, 1-l) + r1(0, -1)*r_prev(-n, l-1)
3276 v = r1(1, 1)*r_prev(1-n, 1-l) + r1(1, -1)*r_prev(1-n, l-1) + &
3277 & r1(-1, 1)*r_prev(n-1, 1-l) + r1(-1, -1)*r_prev(n-1, l-1)
3278 w = r1(1, 1)*r_prev(-n-1, 1-l) + r1(1, -1)*r_prev(-n-1, l-1) - &
3279 & r1(-1, 1)*r_prev(n+1, 1-l) - r1(-1, -1)*r_prev(n+1, l-1)
3280 r(-n, -l) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3281 r(-n, -l) = r(-n, -l) / scal_uvw_m(l)
3282 !dst(ind-l) = dst(ind-l) + src(ind-n)*r(-n, -l)
3283 end do
3284 dst(ind-l) = alpha * dot_product(src(ind-l:ind+l), r(-l:l, -l))
3285 ! Now deal with m=1-l..l-1
3286 do m = 1-l, l-1
3287 ! n = l
3288 v = r1(1, 0)*r_prev(l-1, m) - r1(-1, 0)*r_prev(1-l, m)
3289 r(l, m) = v * scal_v_n(l) / scal_uvw_m(m)
3290 !dst(ind+m) = src(ind+l) * r(l, m)
3291 ! n = -l
3292 v = r1(1, 0)*r_prev(1-l, m) + r1(-1, 0)*r_prev(l-1, m)
3293 r(-l, m) = v * scal_v_n(l) / scal_uvw_m(m)
3294 !dst(ind+m) = dst(ind+m) + src(ind-l)*r(-l, m)
3295 ! n = l-1
3296 u = r1(0, 0) * r_prev(l-1, m)
3297 v = r1(1, 0)*r_prev(l-2, m) - r1(-1, 0)*r_prev(2-l, m)
3298 r(l-1, m) = u*scal_u_n(l-1) + v*scal_v_n(l-1)
3299 r(l-1, m) = r(l-1, m) / scal_uvw_m(m)
3300 !dst(ind+m) = dst(ind+m) + src(ind+l-1)*r(l-1, m)
3301 ! n = 1-l
3302 u = r1(0, 0) * r_prev(1-l, m)
3303 v = r1(1, 0)*r_prev(2-l, m) + r1(-1, 0)*r_prev(l-2, m)
3304 r(1-l, m) = u*scal_u_n(l-1) + v*scal_v_n(l-1)
3305 r(1-l, m) = r(1-l, m) / scal_uvw_m(m)
3306 !dst(ind+m) = dst(ind+m) + src(ind+1-l)*r(1-l, m)
3307 ! n = 0
3308 u = r1(0, 0) * r_prev(0, m)
3309 v = r1(1, 0)*r_prev(1, m) + r1(-1, 0)*r_prev(-1, m)
3310 r(0, m) = u*scal_u_n(0) + v*scal_v_n(0)
3311 r(0, m) = r(0, m) / scal_uvw_m(m)
3312 !dst(ind+m) = dst(ind+m) + src(ind)*r(0, m)
3313 ! n = 1
3314 u = r1(0, 0) * r_prev(1, m)
3315 v = r1(1, 0) * r_prev(0, m)
3316 w = r1(1, 0)*r_prev(2, m) + r1(-1, 0)*r_prev(-2, m)
3317 r(1, m) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3318 r(1, m) = r(1, m) / scal_uvw_m(m)
3319 !dst(ind+m) = dst(ind+m) + src(ind+1)*r(1, m)
3320 ! n = -1
3321 u = r1(0, 0) * r_prev(-1, m)
3322 v = r1(-1, 0) * r_prev(0, m)
3323 w = r1(1, 0)*r_prev(-2, m) - r1(-1, 0)*r_prev(2, m)
3324 r(-1, m) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3325 r(-1, m) = r(-1, m) / scal_uvw_m(m)
3326 !dst(ind+m) = dst(ind+m) + src(ind-1)*r(-1, m)
3327 ! n = 2-l..l-2, n != -1,0,1
3328 do n = 2, l-2
3329 u = r1(0, 0) * r_prev(n, m)
3330 v = r1(1, 0)*r_prev(n-1, m) - r1(-1, 0)*r_prev(1-n, m)
3331 w = r1(1, 0)*r_prev(n+1, m) + r1(-1, 0)*r_prev(-1-n, m)
3332 r(n, m) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3333 r(n, m) = r(n, m) / scal_uvw_m(m)
3334 !dst(ind+m) = dst(ind+m) + src(ind+n)*r(n, m)
3335 u = r1(0, 0) * r_prev(-n, m)
3336 v = r1(1, 0)*r_prev(1-n, m) + r1(-1, 0)*r_prev(n-1, m)
3337 w = r1(1, 0)*r_prev(-n-1, m) - r1(-1, 0)*r_prev(n+1, m)
3338 r(-n, m) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3339 r(-n, m) = r(-n, m) / scal_uvw_m(m)
3340 !dst(ind+m) = dst(ind+m) + src(ind-n)*r(-n, m)
3341 end do
3342 dst(ind+m) = alpha * dot_product(src(ind-l:ind+l), r(-l:l, m))
3343 end do
3344 end do
3345 else
3346 ! Compute rotations/reflections
3347 ! l = 0
3348 dst(1) = beta*dst(1) + alpha*src(1)
3349 if (p .eq. 0) then
3350 return
3351 end if
3352 ! l = 1
3353 dst(2) = beta*dst(2) + alpha*dot_product(r1(-1:1,-1), src(2:4))
3354 dst(3) = beta*dst(3) + alpha*dot_product(r1(-1:1,0), src(2:4))
3355 dst(4) = beta*dst(4) + alpha*dot_product(r1(-1:1,1), src(2:4))
3356 if (p .eq. 1) then
3357 return
3358 end if
3359 ! Set pointers
3360 n = 2*p + 1
3361 l = n ** 2
3362 r_prev(-p:p, -p:p) => work(1:l)
3363 m = 2*l
3364 r(-p:p, -p:p) => work(l+1:m)
3365 l = m + n
3366 scal_uvw_m(-p:p) => work(m+1:l)
3367 m = l + n
3368 scal_u_n(-p:p) => work(l+1:m)
3369 l = m + n
3370 scal_v_n(-p:p) => work(m+1:l)
3371 m = l + n
3372 scal_w_n(-p:p) => work(l+1:m)
3373 ! l = 2
3374 r(2, 2) = (r1(1, 1)*r1(1, 1) - r1(1, -1)*r1(1, -1) - &
3375 & r1(-1, 1)*r1(-1, 1) + r1(-1, -1)*r1(-1, -1)) / two
3376 r(2, 1) = r1(1, 1)*r1(1, 0) - r1(-1, 1)*r1(-1, 0)
3377 r(2, 0) = sqrt3 / two * (r1(1, 0)*r1(1, 0) - r1(-1, 0)*r1(-1, 0))
3378 r(2, -1) = r1(1, -1)*r1(1, 0) - r1(-1, -1)*r1(-1, 0)
3379 r(2, -2) = r1(1, 1)*r1(1, -1) - r1(-1, 1)*r1(-1, -1)
3380 r(1, 2) = r1(1, 1)*r1(0, 1) - r1(1, -1)*r1(0, -1)
3381 r(1, 1) = r1(1, 1)*r1(0, 0) + r1(1, 0)*r1(0, 1)
3382 r(1, 0) = sqrt3 * r1(1, 0) * r1(0, 0)
3383 r(1, -1) = r1(1, -1)*r1(0, 0) + r1(1, 0)*r1(0, -1)
3384 r(1, -2) = r1(1, 1)*r1(0, -1) + r1(1, -1)*r1(0, 1)
3385 r(0, 2) = sqrt3 / two * (r1(0, 1)*r1(0, 1) - r1(0, -1)*r1(0, -1))
3386 r(0, 1) = sqrt3 * r1(0, 1) * r1(0, 0)
3387 r(0, 0) = (three*r1(0, 0)*r1(0, 0)-one) / two
3388 r(0, -1) = sqrt3 * r1(0, -1) * r1(0, 0)
3389 r(0, -2) = sqrt3 * r1(0, 1) * r1(0, -1)
3390 r(-1, 2) = r1(-1, 1)*r1(0, 1) - r1(-1, -1)*r1(0, -1)
3391 r(-1, 1) = r1(-1, 1)*r1(0, 0) + r1(-1, 0)*r1(0, 1)
3392 r(-1, 0) = sqrt3 * r1(-1, 0) * r1(0, 0)
3393 r(-1, -1) = r1(-1, -1)*r1(0, 0) + r1(0, -1)*r1(-1, 0)
3394 r(-1, -2) = r1(-1, 1)*r1(0, -1) + r1(-1, -1)*r1(0, 1)
3395 r(-2, 2) = r1(1, 1)*r1(-1, 1) - r1(1, -1)*r1(-1, -1)
3396 r(-2, 1) = r1(1, 1)*r1(-1, 0) + r1(1, 0)*r1(-1, 1)
3397 r(-2, 0) = sqrt3 * r1(1, 0) * r1(-1, 0)
3398 r(-2, -1) = r1(1, -1)*r1(-1, 0) + r1(1, 0)*r1(-1, -1)
3399 r(-2, -2) = r1(1, 1)*r1(-1, -1) + r1(1, -1)*r1(-1, 1)
3400 do m = -2, 2
3401 dst(7+m) = beta*dst(7+m) + alpha*dot_product(r(-2:2, m), src(5:9))
3402 end do
3403 ! l > 2
3404 do l = 3, p
3405 ! Swap previous and current rotation matrices
3406 r_swap => r_prev
3407 r_prev => r
3408 r => r_swap
3409 ! Prepare scalar factors
3410 scal_uvw_m(0) = dble(l)
3411 do m = 1, l-1
3412 u = sqrt(dble(l*l-m*m))
3413 scal_uvw_m(m) = u
3414 scal_uvw_m(-m) = u
3415 end do
3416 u = two * dble(l)
3417 u = sqrt(dble(u*(u-one)))
3418 scal_uvw_m(l) = u
3419 scal_uvw_m(-l) = u
3420 scal_u_n(0) = dble(l)
3421 scal_v_n(0) = -sqrt(dble(l*(l-1))) / sqrt2
3422 scal_w_n(0) = zero
3423 do n = 1, l-2
3424 u = sqrt(dble(l*l-n*n))
3425 scal_u_n(n) = u
3426 scal_u_n(-n) = u
3427 v = dble(l+n)
3428 v = sqrt(v*(v-one)) / two
3429 scal_v_n(n) = v
3430 scal_v_n(-n) = v
3431 w = dble(l-n)
3432 w = -sqrt(w*(w-one)) / two
3433 scal_w_n(n) = w
3434 scal_w_n(-n) = w
3435 end do
3436 u = sqrt(dble(2*l-1))
3437 scal_u_n(l-1) = u
3438 scal_u_n(1-l) = u
3439 scal_u_n(l) = zero
3440 scal_u_n(-l) = zero
3441 v = sqrt(dble((2*l-1)*(2*l-2))) / two
3442 scal_v_n(l-1) = v
3443 scal_v_n(1-l) = v
3444 v = sqrt(dble(2*l*(2*l-1))) / two
3445 scal_v_n(l) = v
3446 scal_v_n(-l) = v
3447 scal_w_n(l-1) = zero
3448 scal_w_n(l) = zero
3449 scal_w_n(-l) = zero
3450 scal_w_n(1-l) = zero
3451 ind = l*l + l + 1
3452 ! m = l, n = l
3453 v = r1(1, 1)*r_prev(l-1, l-1) - r1(1, -1)*r_prev(l-1, 1-l) - &
3454 & r1(-1, 1)*r_prev(1-l, l-1) + r1(-1, -1)*r_prev(1-l, 1-l)
3455 r(l, l) = v * scal_v_n(l) / scal_uvw_m(l)
3456 !dst(ind+l) = src(ind+l) * r(l, l)
3457 ! m = l, n = -l
3458 v = r1(1, 1)*r_prev(1-l, l-1) - r1(1, -1)*r_prev(1-l, 1-l) + &
3459 & r1(-1, 1)*r_prev(l-1, l-1) - r1(-1, -1)*r_prev(l-1, 1-l)
3460 r(-l, l) = v * scal_v_n(l) / scal_uvw_m(l)
3461 !dst(ind+l) = dst(ind+l) + src(ind-l)*r(-l, l)
3462 ! m = l, n = l-1
3463 u = r1(0, 1)*r_prev(l-1, l-1) - r1(0, -1)*r_prev(l-1, 1-l)
3464 v = r1(1, 1)*r_prev(l-2, l-1) - r1(1, -1)*r_prev(l-2, 1-l) - &
3465 & r1(-1, 1)*r_prev(2-l, l-1) + r1(-1, -1)*r_prev(2-l, 1-l)
3466 r(l-1, l) = (u*scal_u_n(l-1)+v*scal_v_n(l-1)) / scal_uvw_m(l)
3467 !dst(ind+l) = dst(ind+l) + src(ind+l-1)*r(l-1, l)
3468 ! m = l, n = 1-l
3469 u = r1(0, 1)*r_prev(1-l, l-1) - r1(0, -1)*r_prev(1-l, 1-l)
3470 v = r1(1, 1)*r_prev(2-l, l-1) - r1(1, -1)*r_prev(2-l, 1-l) + &
3471 & r1(-1, 1)*r_prev(l-2, l-1) - r1(-1, -1)*r_prev(l-2, 1-l)
3472 r(1-l, l) = (u*scal_u_n(l-1)+v*scal_v_n(l-1)) / scal_uvw_m(l)
3473 !dst(ind+l) = dst(ind+l) + src(ind+1-l)*r(1-l, l)
3474 ! m = l, n = 1
3475 u = r1(0, 1)*r_prev(1, l-1) - r1(0, -1)*r_prev(1, 1-l)
3476 v = r1(1, 1)*r_prev(0, l-1) - r1(1, -1)*r_prev(0, 1-l)
3477 w = r1(1, 1)*r_prev(2, l-1) - r1(1, -1)*r_prev(2, 1-l) + &
3478 & r1(-1, 1)*r_prev(-2, l-1) - r1(-1, -1)*r_prev(-2, 1-l)
3479 r(1, l) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3480 r(1, l) = r(1, l) / scal_uvw_m(l)
3481 !dst(ind+l) = dst(ind+l) + src(ind+1)*r(1, l)
3482 ! m = l, n = -1
3483 u = r1(0, 1)*r_prev(-1, l-1) - r1(0, -1)*r_prev(-1, 1-l)
3484 v = r1(-1, 1)*r_prev(0, l-1) - r1(-1, -1)*r_prev(0, 1-l)
3485 w = r1(1, 1)*r_prev(-2, l-1) - r1(1, -1)*r_prev(-2, 1-l) - &
3486 & r1(-1, 1)*r_prev(2, l-1) + r1(-1, -1)*r_prev(2, 1-l)
3487 r(-1, l) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3488 r(-1, l) = r(-1, l) / scal_uvw_m(l)
3489 !dst(ind+l) = dst(ind+l) + src(ind-1)*r(-1, l)
3490 ! m = l, n = 0
3491 u = r1(0, 1)*r_prev(0, l-1) - r1(0, -1)*r_prev(0, 1-l)
3492 v = r1(1, 1)*r_prev(1, l-1) - r1(1, -1)*r_prev(1, 1-l) + &
3493 & r1(-1, 1)*r_prev(-1, l-1) - r1(-1, -1)*r_prev(-1, 1-l)
3494 r(0, l) = (u*scal_u_n(0) + v*scal_v_n(0)) / scal_uvw_m(l)
3495 !dst(ind+l) = dst(ind+l) + src(ind)*r(0, l)
3496 ! m = l, n = 2-l..l-2, n != -1,0,1
3497 do n = 2, l-2
3498 u = r1(0, 1)*r_prev(n, l-1) - r1(0, -1)*r_prev(n, 1-l)
3499 v = r1(1, 1)*r_prev(n-1, l-1) - r1(1, -1)*r_prev(n-1, 1-l) - &
3500 & r1(-1, 1)*r_prev(1-n, l-1) + r1(-1, -1)*r_prev(1-n, 1-l)
3501 w = r1(1, 1)*r_prev(n+1, l-1) - r1(1, -1)*r_prev(n+1, 1-l) + &
3502 & r1(-1, 1)*r_prev(-n-1, l-1) - r1(-1, -1)*r_prev(-n-1, 1-l)
3503 r(n, l) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3504 r(n, l) = r(n, l) / scal_uvw_m(l)
3505 !dst(ind+l) = dst(ind+l) + src(ind+n)*r(n, l)
3506 u = r1(0, 1)*r_prev(-n, l-1) - r1(0, -1)*r_prev(-n, 1-l)
3507 v = r1(1, 1)*r_prev(1-n, l-1) - r1(1, -1)*r_prev(1-n, 1-l) + &
3508 & r1(-1, 1)*r_prev(n-1, l-1) - r1(-1, -1)*r_prev(n-1, 1-l)
3509 w = r1(1, 1)*r_prev(-n-1, l-1) - r1(1, -1)*r_prev(-n-1, 1-l) - &
3510 & r1(-1, 1)*r_prev(n+1, l-1) + r1(-1, -1)*r_prev(n+1, 1-l)
3511 r(-n, l) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3512 r(-n, l) = r(-n, l) / scal_uvw_m(l)
3513 !dst(ind+l) = dst(ind+l) + src(ind-n)*r(-n, l)
3514 end do
3515 dst(ind+l) = beta*dst(ind+l) + &
3516 & alpha*dot_product(src(ind-l:ind+l), r(-l:l, l))
3517 ! m = -l, n = l
3518 v = r1(1, 1)*r_prev(l-1, 1-l) + r1(1, -1)*r_prev(l-1, l-1) - &
3519 & r1(-1, 1)*r_prev(1-l, 1-l) - r1(-1, -1)*r_prev(1-l, l-1)
3520 r(l, -l) = v * scal_v_n(l) / scal_uvw_m(l)
3521 !dst(ind-l) = src(ind+l)*r(l, -l)
3522 ! m = -l, n = -l
3523 v = r1(1, 1)*r_prev(1-l, 1-l) + r1(1, -1)*r_prev(1-l, l-1) + &
3524 & r1(-1, 1)*r_prev(l-1, 1-l) + r1(-1, -1)*r_prev(l-1, l-1)
3525 r(-l, -l) = v * scal_v_n(l) / scal_uvw_m(l)
3526 !dst(ind-l) = dst(ind-l) + src(ind-l)*r(-l, -l)
3527 ! m = -l, n = l-1
3528 u = r1(0, 1)*r_prev(l-1, 1-l) + r1(0, -1)*r_prev(l-1, l-1)
3529 v = r1(1, 1)*r_prev(l-2, 1-l) + r1(1, -1)*r_prev(l-2, l-1) - &
3530 & r1(-1, 1)*r_prev(2-l, 1-l) - r1(-1, -1)*r_prev(2-l, l-1)
3531 r(l-1, -l) = (u*scal_u_n(l-1)+v*scal_v_n(l-1)) / scal_uvw_m(l)
3532 !dst(ind-l) = dst(ind-l) + src(ind+l-1)*r(l-1, -l)
3533 ! m = -l, n = 1-l
3534 u = r1(0, 1)*r_prev(1-l, 1-l) + r1(0, -1)*r_prev(1-l, l-1)
3535 v = r1(1, 1)*r_prev(2-l, 1-l) + r1(1, -1)*r_prev(2-l, l-1) + &
3536 & r1(-1, 1)*r_prev(l-2, 1-l) + r1(-1, -1)*r_prev(l-2, l-1)
3537 r(1-l, -l) = (u*scal_u_n(l-1)+v*scal_v_n(l-1)) / scal_uvw_m(l)
3538 !dst(ind-l) = dst(ind-l) + src(ind+1-l)*r(1-l, -l)
3539 ! m = -l, n = 1
3540 u = r1(0, 1)*r_prev(1, 1-l) + r1(0, -1)*r_prev(1, l-1)
3541 v = r1(1, 1)*r_prev(0, 1-l) + r1(1, -1)*r_prev(0, l-1)
3542 w = r1(1, 1)*r_prev(2, 1-l) + r1(1, -1)*r_prev(2, l-1) + &
3543 & r1(-1, 1)*r_prev(-2, 1-l) + r1(-1, -1)*r_prev(-2, l-1)
3544 r(1, -l) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3545 r(1, -l) = r(1, -l) / scal_uvw_m(l)
3546 !dst(ind-l) = dst(ind-l) + src(ind+1)*r(1, -l)
3547 ! m = -l, n = -1
3548 u = r1(0, 1)*r_prev(-1, 1-l) + r1(0, -1)*r_prev(-1, l-1)
3549 v = r1(-1, 1)*r_prev(0, 1-l) + r1(-1, -1)*r_prev(0, l-1)
3550 w = r1(1, 1)*r_prev(-2, 1-l) + r1(1, -1)*r_prev(-2, l-1) - &
3551 & r1(-1, 1)*r_prev(2, 1-l) - r1(-1, -1)*r_prev(2, l-1)
3552 r(-1, -l) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3553 r(-1, -l) = r(-1, -l) / scal_uvw_m(l)
3554 !dst(ind-l) = dst(ind-l) + src(ind-1)*r(-1, -l)
3555 ! m = -l, n = 0
3556 u = r1(0, 1)*r_prev(0, 1-l) + r1(0, -1)*r_prev(0, l-1)
3557 v = r1(1, 1)*r_prev(1, 1-l) + r1(1, -1)*r_prev(1, l-1) + &
3558 & r1(-1, 1)*r_prev(-1, 1-l) + r1(-1, -1)*r_prev(-1, l-1)
3559 r(0, -l) = (u*scal_u_n(0) + v*scal_v_n(0)) / scal_uvw_m(l)
3560 !dst(ind-l) = dst(ind-l) + src(ind)*r(0, -l)
3561 ! m = -l, n = 2-l..l-2, n != -1,0,1
3562 do n = 2, l-2
3563 u = r1(0, 1)*r_prev(n, 1-l) + r1(0, -1)*r_prev(n, l-1)
3564 v = r1(1, 1)*r_prev(n-1, 1-l) + r1(1, -1)*r_prev(n-1, l-1) - &
3565 & r1(-1, 1)*r_prev(1-n, 1-l) - r1(-1, -1)*r_prev(1-n, l-1)
3566 w = r1(1, 1)*r_prev(n+1, 1-l) + r1(1, -1)*r_prev(n+1, l-1) + &
3567 & r1(-1, 1)*r_prev(-n-1, 1-l) + r1(-1, -1)*r_prev(-n-1, l-1)
3568 r(n, -l) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3569 r(n, -l) = r(n, -l) / scal_uvw_m(l)
3570 !dst(ind-l) = dst(ind-l) + src(ind+n)*r(n, -l)
3571 u = r1(0, 1)*r_prev(-n, 1-l) + r1(0, -1)*r_prev(-n, l-1)
3572 v = r1(1, 1)*r_prev(1-n, 1-l) + r1(1, -1)*r_prev(1-n, l-1) + &
3573 & r1(-1, 1)*r_prev(n-1, 1-l) + r1(-1, -1)*r_prev(n-1, l-1)
3574 w = r1(1, 1)*r_prev(-n-1, 1-l) + r1(1, -1)*r_prev(-n-1, l-1) - &
3575 & r1(-1, 1)*r_prev(n+1, 1-l) - r1(-1, -1)*r_prev(n+1, l-1)
3576 r(-n, -l) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3577 r(-n, -l) = r(-n, -l) / scal_uvw_m(l)
3578 !dst(ind-l) = dst(ind-l) + src(ind-n)*r(-n, -l)
3579 end do
3580 dst(ind-l) = beta*dst(ind-l) + &
3581 & alpha*dot_product(src(ind-l:ind+l), r(-l:l, -l))
3582 ! Now deal with m=1-l..l-1
3583 do m = 1-l, l-1
3584 ! n = l
3585 v = r1(1, 0)*r_prev(l-1, m) - r1(-1, 0)*r_prev(1-l, m)
3586 r(l, m) = v * scal_v_n(l) / scal_uvw_m(m)
3587 !dst(ind+m) = src(ind+l) * r(l, m)
3588 ! n = -l
3589 v = r1(1, 0)*r_prev(1-l, m) + r1(-1, 0)*r_prev(l-1, m)
3590 r(-l, m) = v * scal_v_n(l) / scal_uvw_m(m)
3591 !dst(ind+m) = dst(ind+m) + src(ind-l)*r(-l, m)
3592 ! n = l-1
3593 u = r1(0, 0) * r_prev(l-1, m)
3594 v = r1(1, 0)*r_prev(l-2, m) - r1(-1, 0)*r_prev(2-l, m)
3595 r(l-1, m) = u*scal_u_n(l-1) + v*scal_v_n(l-1)
3596 r(l-1, m) = r(l-1, m) / scal_uvw_m(m)
3597 !dst(ind+m) = dst(ind+m) + src(ind+l-1)*r(l-1, m)
3598 ! n = 1-l
3599 u = r1(0, 0) * r_prev(1-l, m)
3600 v = r1(1, 0)*r_prev(2-l, m) + r1(-1, 0)*r_prev(l-2, m)
3601 r(1-l, m) = u*scal_u_n(l-1) + v*scal_v_n(l-1)
3602 r(1-l, m) = r(1-l, m) / scal_uvw_m(m)
3603 !dst(ind+m) = dst(ind+m) + src(ind+1-l)*r(1-l, m)
3604 ! n = 0
3605 u = r1(0, 0) * r_prev(0, m)
3606 v = r1(1, 0)*r_prev(1, m) + r1(-1, 0)*r_prev(-1, m)
3607 r(0, m) = u*scal_u_n(0) + v*scal_v_n(0)
3608 r(0, m) = r(0, m) / scal_uvw_m(m)
3609 !dst(ind+m) = dst(ind+m) + src(ind)*r(0, m)
3610 ! n = 1
3611 u = r1(0, 0) * r_prev(1, m)
3612 v = r1(1, 0) * r_prev(0, m)
3613 w = r1(1, 0)*r_prev(2, m) + r1(-1, 0)*r_prev(-2, m)
3614 r(1, m) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3615 r(1, m) = r(1, m) / scal_uvw_m(m)
3616 !dst(ind+m) = dst(ind+m) + src(ind+1)*r(1, m)
3617 ! n = -1
3618 u = r1(0, 0) * r_prev(-1, m)
3619 v = r1(-1, 0) * r_prev(0, m)
3620 w = r1(1, 0)*r_prev(-2, m) - r1(-1, 0)*r_prev(2, m)
3621 r(-1, m) = u*scal_u_n(1) + sqrt2*v*scal_v_n(1) + w*scal_w_n(1)
3622 r(-1, m) = r(-1, m) / scal_uvw_m(m)
3623 !dst(ind+m) = dst(ind+m) + src(ind-1)*r(-1, m)
3624 ! n = 2-l..l-2, n != -1,0,1
3625 do n = 2, l-2
3626 u = r1(0, 0) * r_prev(n, m)
3627 v = r1(1, 0)*r_prev(n-1, m) - r1(-1, 0)*r_prev(1-n, m)
3628 w = r1(1, 0)*r_prev(n+1, m) + r1(-1, 0)*r_prev(-1-n, m)
3629 r(n, m) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3630 r(n, m) = r(n, m) / scal_uvw_m(m)
3631 !dst(ind+m) = dst(ind+m) + src(ind+n)*r(n, m)
3632 u = r1(0, 0) * r_prev(-n, m)
3633 v = r1(1, 0)*r_prev(1-n, m) + r1(-1, 0)*r_prev(n-1, m)
3634 w = r1(1, 0)*r_prev(-n-1, m) - r1(-1, 0)*r_prev(n+1, m)
3635 r(-n, m) = u*scal_u_n(n) + v*scal_v_n(n) + w*scal_w_n(n)
3636 r(-n, m) = r(-n, m) / scal_uvw_m(m)
3637 !dst(ind+m) = dst(ind+m) + src(ind-n)*r(-n, m)
3638 end do
3639 dst(ind+m) = beta*dst(ind+m) + &
3640 & alpha*dot_product(src(ind-l:ind+l), r(-l:l, m))
3641 end do
3642 end do
3643 end if
3644end subroutine fmm_sph_transform_work
3645
3667subroutine fmm_sph_rotate_oz(p, vcos, vsin, alpha, src, beta, dst)
3668 ! Inputs
3669 integer, intent(in) :: p
3670 real(dp), intent(in) :: vcos(p+1), vsin(p+1), alpha, src((p+1)*(p+1)), beta
3671 ! Output
3672 real(dp), intent(inout) :: dst((p+1)*(p+1))
3673 ! Call corresponding work routine
3674 call fmm_sph_rotate_oz_work(p, vcos, vsin, alpha, src, beta, dst)
3675end subroutine fmm_sph_rotate_oz
3676
3698subroutine fmm_sph_rotate_oz_work(p, vcos, vsin, alpha, src, beta, dst)
3699 ! Inputs
3700 integer, intent(in) :: p
3701 real(dp), intent(in) :: vcos(p+1), vsin(p+1), alpha, src((p+1)*(p+1)), beta
3702 ! Output
3703 real(dp), intent(inout) :: dst((p+1)*(p+1))
3704 ! Local variables
3705 integer :: l, m, ind
3706 real(dp) :: v1, v2, v3, v4
3707 ! In case alpha is zero just scale output
3708 if (alpha .eq. zero) then
3709 ! Set output to zero if beta is also zero
3710 if (beta .eq. zero) then
3711 dst = zero
3712 else
3713 dst = beta * dst
3714 end if
3715 ! Exit subroutine
3716 return
3717 end if
3718 ! Now alpha is non-zero
3719 ! In case beta is zero output is just overwritten without being read
3720 if (beta .eq. zero) then
3721 ! l = 0
3722 dst(1) = alpha*src(1)
3723 ! l > 0
3724 !!GCC$ unroll 4
3725 do l = 1, p
3726 ind = l*l + l + 1
3727 ! m = 0
3728 dst(ind) = alpha*src(ind)
3729 ! m != 0
3730 !!GCC$ unroll 4
3731 do m = 1, l
3732 v1 = src(ind+m)
3733 v2 = src(ind-m)
3734 v3 = vcos(1+m)
3735 v4 = vsin(1+m)
3736 ! m > 0
3737 dst(ind+m) = alpha * (v1*v3-v2*v4)
3738 ! m < 0
3739 dst(ind-m) = alpha * (v1*v4+v2*v3)
3740 end do
3741 end do
3742 else
3743 ! l = 0
3744 dst(1) = beta*dst(1) + alpha*src(1)
3745 ! l > 0
3746 !!GCC$ unroll 4
3747 do l = 1, p
3748 ind = l*l + l + 1
3749 ! m = 0
3750 dst(ind) = beta*dst(ind) + alpha*src(ind)
3751 ! m != 0
3752 !!GCC$ unroll 4
3753 do m = 1, l
3754 v1 = src(ind+m)
3755 v2 = src(ind-m)
3756 v3 = vcos(1+m)
3757 v4 = vsin(1+m)
3758 ! m > 0
3759 dst(ind+m) = beta*dst(ind+m) + alpha*(v1*v3-v2*v4)
3760 ! m < 0
3761 dst(ind-m) = beta*dst(ind-m) + alpha*(v1*v4+v2*v3)
3762 end do
3763 end do
3764 end if
3765end subroutine fmm_sph_rotate_oz_work
3766
3788subroutine fmm_sph_rotate_oz_adj(p, vcos, vsin, alpha, src, beta, dst)
3789 ! Inputs
3790 integer, intent(in) :: p
3791 real(dp), intent(in) :: vcos(p+1), vsin(p+1), alpha, src((p+1)*(p+1)), beta
3792 ! Output
3793 real(dp), intent(inout) :: dst((p+1)*(p+1))
3794 ! Call corresponding work routine
3795 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src, beta, dst)
3796end subroutine fmm_sph_rotate_oz_adj
3797
3819subroutine fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src, beta, dst)
3820 ! Inputs
3821 integer, intent(in) :: p
3822 real(dp), intent(in) :: vcos(p+1), vsin(p+1), alpha, src((p+1)*(p+1)), beta
3823 ! Output
3824 real(dp), intent(inout) :: dst((p+1)*(p+1))
3825 ! Local variables
3826 integer :: l, m, ind
3827 real(dp) :: v1, v2, v3, v4
3828 ! In case alpha is zero just scale output
3829 if (alpha .eq. zero) then
3830 ! Set output to zero if beta is also zero
3831 if (beta .eq. zero) then
3832 dst = zero
3833 else
3834 dst = beta * dst
3835 end if
3836 ! Exit subroutine
3837 return
3838 end if
3839 ! Now alpha is non-zero
3840 ! In case beta is zero output is just overwritten without being read
3841 if (beta .eq. zero) then
3842 ! l = 0
3843 dst(1) = alpha*src(1)
3844 ! l > 0
3845 do l = 1, p
3846 ind = l*l + l + 1
3847 ! m = 0
3848 dst(ind) = alpha*src(ind)
3849 ! m != 0
3850 do m = 1, l
3851 v1 = src(ind+m)
3852 v2 = src(ind-m)
3853 v3 = vcos(1+m)
3854 v4 = vsin(1+m)
3855 ! m > 0
3856 dst(ind+m) = alpha * (v1*v3+v2*v4)
3857 ! m < 0
3858 dst(ind-m) = alpha * (v2*v3-v1*v4)
3859 end do
3860 end do
3861 else
3862 ! l = 0
3863 dst(1) = beta*dst(1) + alpha*src(1)
3864 ! l > 0
3865 do l = 1, p
3866 ind = l*l + l + 1
3867 ! m = 0
3868 dst(ind) = beta*dst(ind) + alpha*src(ind)
3869 ! m != 0
3870 do m = 1, l
3871 v1 = src(ind+m)
3872 v2 = src(ind-m)
3873 v3 = vcos(1+m)
3874 v4 = vsin(1+m)
3875 ! m > 0
3876 dst(ind+m) = beta*dst(ind+m) + alpha*(v1*v3+v2*v4)
3877 ! m < 0
3878 dst(ind-m) = beta*dst(ind-m) + alpha*(v2*v3-v1*v4)
3879 end do
3880 end do
3881 end if
3882end subroutine fmm_sph_rotate_oz_adj_work
3883
3916subroutine fmm_sph_rotate_oxz(p, ctheta, stheta, alpha, src, beta, dst)
3917 ! Inputs
3918 integer, intent(in) :: p
3919 real(dp), intent(in) :: ctheta, stheta, alpha, src((p+1)**2), beta
3920 ! Output
3921 real(dp), intent(inout) :: dst((p+1)**2)
3922 ! Temporary workspace
3923 real(dp) :: work(2*(2*p+1)*(2*p+3)+p)
3924 ! Call corresponding work routine
3925 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, alpha, src, beta, dst, &
3926 & work)
3927end subroutine fmm_sph_rotate_oxz
3928
3941subroutine fmm_sph_rotate_oxz_work(p, ctheta, stheta, alpha, src, beta, dst, &
3942 & work)
3943 ! Inputs
3944 integer, intent(in) :: p
3945 real(dp), intent(in) :: ctheta, stheta, alpha, src((p+1)**2), beta
3946 ! Output
3947 real(dp), intent(out) :: dst((p+1)*(p+1))
3948 ! Temporary workspace
3949 real(dp), intent(out), target :: work(4*p*p+13*p+4)
3950 ! Local variables
3951 real(dp) :: u, v, w, fl, fl2, tmp1, tmp2, vu(2), vv(2), vw(2), &
3952 ctheta2, stheta2, cstheta
3953 integer :: l, m, n, ind
3954 ! Pointers for a workspace
3955 real(dp), pointer :: r(:, :, :), r_prev(:, :, :), scal_uvw_m(:), &
3956 & scal_u_n(:), scal_v_n(:), scal_w_n(:), r_swap(:, :, :), vsqr(:)
3957 !! Spherical harmonics Y_l^m with negative m transform into harmonics Y_l^m
3958 !! with the same l and negative m, while harmonics Y_l^m with non-negative
3959 !! m transform into harmonics Y_l^m with the same l and non-negative m.
3960 !! Transformations for non-negative m will be stored in r(1, :, :) and for
3961 !! negative m will be in r(2, :, :)
3962 ! In case alpha is zero just scale output
3963 if (alpha .eq. zero) then
3964 ! Set output to zero if beta is also zero
3965 if (beta .eq. zero) then
3966 dst = zero
3967 else
3968 dst = beta * dst
3969 end if
3970 ! Exit subroutine
3971 return
3972 end if
3973 ! Now alpha is non-zero
3974 ! In case beta is zero output is just overwritten without being read
3975 if (beta .eq. zero) then
3976 ! Compute rotations/reflections
3977 ! l = 0
3978 dst(1) = alpha * src(1)
3979 if (p .eq. 0) then
3980 return
3981 end if
3982 ! l = 1
3983 dst(2) = alpha * src(2)
3984 dst(3) = alpha * (src(3)*ctheta - src(4)*stheta)
3985 dst(4) = alpha * (src(3)*stheta + src(4)*ctheta)
3986 if (p .eq. 1) then
3987 return
3988 end if
3989 ! Set pointers
3990 l = 2 * (p+1) * (p+1)
3991 r(1:2, 0:p, 0:p) => work(1:l)
3992 m = 2 * l
3993 r_prev(1:2, 0:p, 0:p) => work(l+1:m)
3994 l = m + p + 1
3995 scal_uvw_m(0:p) => work(m+1:l)
3996 m = l + p
3997 scal_u_n(0:p-1) => work(l+1:m)
3998 l = m + p + 1
3999 scal_v_n(0:p) => work(m+1:l)
4000 m = l + p - 2
4001 scal_w_n(1:p-2) => work(l+1:m)
4002 l = m + p
4003 vsqr(1:p) => work(m+1:l)
4004 ! l = 2, m >= 0
4005 ctheta2 = ctheta * ctheta
4006 cstheta = ctheta * stheta
4007 stheta2 = stheta * stheta
4008 r(1, 2, 2) = (ctheta2 + one) / two
4009 r(1, 1, 2) = cstheta
4010 r(1, 0, 2) = sqrt3 / two * stheta2
4011 dst(9) = alpha * (src(9)*r(1, 2, 2) + src(8)*r(1, 1, 2) + &
4012 & src(7)*r(1, 0, 2))
4013 r(1, 2, 1) = -cstheta
4014 r(1, 1, 1) = ctheta2 - stheta2
4015 r(1, 0, 1) = sqrt3 * cstheta
4016 dst(8) = alpha * (src(9)*r(1, 2, 1) + src(8)*r(1, 1, 1) + &
4017 & src(7)*r(1, 0, 1))
4018 r(1, 2, 0) = sqrt3 / two * stheta2
4019 r(1, 1, 0) = -sqrt3 * cstheta
4020 r(1, 0, 0) = (three*ctheta2-one) / two
4021 dst(7) = alpha * (src(9)*r(1, 2, 0) + src(8)*r(1, 1, 0) + &
4022 & src(7)*r(1, 0, 0))
4023 ! l = 2, m < 0
4024 r(2, 1, 1) = ctheta
4025 r(2, 2, 1) = -stheta
4026 dst(6) = alpha * (src(6)*r(2, 1, 1) + src(5)*r(2, 2, 1))
4027 r(2, 1, 2) = stheta
4028 r(2, 2, 2) = ctheta
4029 dst(5) = alpha * (src(6)*r(2, 1, 2) + src(5)*r(2, 2, 2))
4030 ! l > 2
4031 vsqr(1) = one
4032 vsqr(2) = four
4033 do l = 3, p
4034 ! Swap previous and current rotation matrices
4035 r_swap => r_prev
4036 r_prev => r
4037 r => r_swap
4038 ! Prepare scalar factors
4039 fl = dble(l)
4040 fl2 = fl * fl
4041 vsqr(l) = fl2
4042 scal_uvw_m(0) = one / fl
4043 !!GCC$ unroll 4
4044 do m = 1, l-1
4045 u = sqrt(fl2 - vsqr(m))
4046 scal_uvw_m(m) = one / u
4047 end do
4048 u = two * dble(l)
4049 u = sqrt(dble(u*(u-one)))
4050 scal_uvw_m(l) = one / u
4051 scal_u_n(0) = dble(l)
4052 scal_v_n(0) = -sqrt(dble(l*(l-1))) / sqrt2
4053 !!GCC$ unroll 4
4054 do n = 1, l-2
4055 u = sqrt(fl2-vsqr(n))
4056 scal_u_n(n) = u
4057 end do
4058 !!GCC$ unroll 4
4059 do n = 1, l-2
4060 v = dble(l+n)
4061 v = sqrt(v*v-v) / two
4062 scal_v_n(n) = v
4063 w = dble(l-n)
4064 w = -sqrt(w*w-w) / two
4065 scal_w_n(n) = w
4066 end do
4067 u = sqrt(dble(2*l-1))
4068 scal_u_n(l-1) = u
4069 v = sqrt(dble((2*l-1)*(2*l-2))) / two
4070 scal_v_n(l-1) = v
4071 v = sqrt(dble(2*l*(2*l-1))) / two
4072 scal_v_n(l) = v
4073 ind = l*l + l + 1
4074 ! m = l, n = l and m = -l, n = - l
4075 vv = ctheta*r_prev(:, l-1, l-1) + r_prev(2:1:-1, l-1, l-1)
4076 r(:, l, l) = vv * scal_v_n(l) * scal_uvw_m(l)
4077 tmp1 = src(ind+l) * r(1, l, l)
4078 tmp2 = src(ind-l) * r(2, l, l)
4079 ! m = l, n = l-1 and m = -l, n = 1-l
4080 vu = stheta * r_prev(:, l-1, l-1)
4081 vv = ctheta*r_prev(:, l-2, l-1) + r_prev(2:1:-1, l-2, l-1)
4082 r(:, l-1, l) = (vu*scal_u_n(l-1)+vv*scal_v_n(l-1)) * scal_uvw_m(l)
4083 tmp1 = tmp1 + src(ind+l-1)*r(1, l-1, l)
4084 tmp2 = tmp2 + src(ind-l+1)*r(2, l-1, l)
4085 ! m = l, n = 1 and m = -l, n = -1
4086 vu = stheta * r_prev(:, 1, l-1)
4087 vv(1) = ctheta * r_prev(1, 0, l-1)
4088 vv(2) = r_prev(1, 0, l-1)
4089 vw = ctheta*r_prev(:, 2, l-1) - r_prev(2:1:-1, 2, l-1)
4090 r(:, 1, l) = vu*scal_u_n(1) + vw*scal_w_n(1) + sqrt2*scal_v_n(1)*vv
4091 r(:, 1, l) = r(:, 1, l) * scal_uvw_m(l)
4092 tmp1 = tmp1 + src(ind+1)*r(1, 1, l)
4093 tmp2 = tmp2 + src(ind-1)*r(2, 1, l)
4094 ! m = l, n = 0
4095 u = stheta * r_prev(1, 0, l-1)
4096 v = ctheta*r_prev(1, 1, l-1) - r_prev(2, 1, l-1)
4097 r(1, 0, l) = (u*scal_u_n(0) + v*scal_v_n(0)) * scal_uvw_m(l)
4098 tmp1 = tmp1 + src(ind)*r(1, 0, l)
4099 ! m = l, n = 2..l-2 and m = -l, n = 2-l..-2
4100 !!GCC$ unroll 4
4101 do n = 2, l-2
4102 vu = stheta * r_prev(:, n, l-1)
4103 vv = ctheta*r_prev(:, n-1, l-1) + r_prev(2:1:-1, n-1, l-1)
4104 vw = ctheta*r_prev(:, n+1, l-1) - r_prev(2:1:-1, n+1, l-1)
4105 vu = vu*scal_u_n(n) + vv*scal_v_n(n) + vw*scal_w_n(n)
4106 r(:, n, l) = vu * scal_uvw_m(l)
4107 tmp1 = tmp1 + src(ind+n)*r(1, n, l)
4108 tmp2 = tmp2 + src(ind-n)*r(2, n, l)
4109 end do
4110 dst(ind+l) = alpha * tmp1
4111 dst(ind-l) = alpha * tmp2
4112 ! Now deal with m = 0
4113 ! n = l and n = -l
4114 v = -stheta * r_prev(1, l-1, 0)
4115 u = scal_v_n(l) * scal_uvw_m(0)
4116 r(1, l, 0) = v * u
4117 tmp1 = src(ind+l) * r(1, l, 0)
4118 ! n = l-1
4119 u = ctheta * r_prev(1, l-1, 0)
4120 v = -stheta * r_prev(1, l-2, 0)
4121 w = u*scal_u_n(l-1) + v*scal_v_n(l-1)
4122 r(1, l-1, 0) = w * scal_uvw_m(0)
4123 tmp1 = tmp1 + src(ind+l-1)*r(1, l-1, 0)
4124 ! n = 0
4125 u = ctheta * r_prev(1, 0, 0)
4126 v = -stheta * r_prev(1, 1, 0)
4127 w = u*scal_u_n(0) + v*scal_v_n(0)
4128 r(1, 0, 0) = w * scal_uvw_m(0)
4129 tmp1 = tmp1 + src(ind)*r(1, 0, 0)
4130 ! n = 1
4131 v = sqrt2*scal_v_n(1)*r_prev(1, 0, 0) + &
4132 & scal_w_n(1)*r_prev(1, 2, 0)
4133 u = ctheta * r_prev(1, 1, 0)
4134 w = scal_u_n(1)*u - stheta*v
4135 r(1, 1, 0) = w * scal_uvw_m(0)
4136 tmp1 = tmp1 + src(ind+1)*r(1, 1, 0)
4137 ! n = 2..l-2
4138 !!GCC$ unroll 4
4139 do n = 2, l-2
4140 v = scal_v_n(n)*r_prev(1, n-1, 0) + &
4141 & scal_w_n(n)*r_prev(1, n+1, 0)
4142 u = ctheta * r_prev(1, n, 0)
4143 w = scal_u_n(n)*u - stheta*v
4144 r(1, n, 0) = w * scal_uvw_m(0)
4145 tmp1 = tmp1 + src(ind+n)*r(1, n, 0)
4146 end do
4147 dst(ind) = alpha * tmp1
4148 ! Now deal with m=1..l-1 and m=1-l..-1
4149 !!GCC$ unroll 4
4150 do m = 1, l-1
4151 ! n = l and n = -l
4152 vv = -stheta * r_prev(:, l-1, m)
4153 u = scal_v_n(l) * scal_uvw_m(m)
4154 r(:, l, m) = vv * u
4155 tmp1 = src(ind+l) * r(1, l, m)
4156 tmp2 = src(ind-l) * r(2, l, m)
4157 ! n = l-1 and n = 1-l
4158 vu = ctheta * r_prev(:, l-1, m)
4159 vv = -stheta * r_prev(:, l-2, m)
4160 vw = vu*scal_u_n(l-1) + vv*scal_v_n(l-1)
4161 r(:, l-1, m) = vw * scal_uvw_m(m)
4162 tmp1 = tmp1 + src(ind+l-1)*r(1, l-1, m)
4163 tmp2 = tmp2 + src(ind-l+1)*r(2, l-1, m)
4164 ! n = 0
4165 u = ctheta * r_prev(1, 0, m)
4166 v = -stheta * r_prev(1, 1, m)
4167 w = u*scal_u_n(0) + v*scal_v_n(0)
4168 r(1, 0, m) = w * scal_uvw_m(m)
4169 tmp1 = tmp1 + src(ind)*r(1, 0, m)
4170 ! n = 1
4171 v = sqrt2*scal_v_n(1)*r_prev(1, 0, m) + &
4172 & scal_w_n(1)*r_prev(1, 2, m)
4173 u = ctheta * r_prev(1, 1, m)
4174 w = scal_u_n(1)*u - stheta*v
4175 r(1, 1, m) = w * scal_uvw_m(m)
4176 tmp1 = tmp1 + src(ind+1)*r(1, 1, m)
4177 ! n = -1
4178 u = ctheta * r_prev(2, 1, m)
4179 w = -stheta * r_prev(2, 2, m)
4180 v = u*scal_u_n(1) + w*scal_w_n(1)
4181 r(2, 1, m) = v * scal_uvw_m(m)
4182 tmp2 = tmp2 + src(ind-1)*r(2, 1, m)
4183 ! n = 2..l-2 and n = 2-l..-2
4184 !!GCC$ unroll 4
4185 do n = 2, l-2
4186 vv = scal_v_n(n)*r_prev(:, n-1, m) + &
4187 & scal_w_n(n)*r_prev(:, n+1, m)
4188 vu = ctheta * r_prev(:, n, m)
4189 vw = scal_u_n(n)*vu - stheta*vv
4190 r(:, n, m) = vw * scal_uvw_m(m)
4191 tmp1 = tmp1 + src(ind+n)*r(1, n, m)
4192 tmp2 = tmp2 + src(ind-n)*r(2, n, m)
4193 end do
4194 dst(ind+m) = alpha * tmp1
4195 dst(ind-m) = alpha * tmp2
4196 end do
4197 end do
4198 else
4199 stop "Not Implemented"
4200 end if
4201end subroutine fmm_sph_rotate_oxz_work
4202
4225subroutine fmm_m2m_ztranslate(z, src_r, dst_r, p, vscales, vcnk, alpha, &
4226 & src_m, beta, dst_m)
4227 ! Inputs
4228 integer, intent(in) :: p
4229 real(dp), intent(in) :: z, src_r, dst_r, vscales((p+1)*(p+1)), &
4230 & vcnk((2*p+1)*(p+1)), alpha, src_m((p+1)*(p+1)), beta
4231 ! Output
4232 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
4233 ! Temporary workspace
4234 real(dp) :: work(2*(p+1))
4235 ! Call corresponding work routine
4236 call fmm_m2m_ztranslate_work(z, src_r, dst_r, p, vscales, vcnk, alpha, &
4237 & src_m, beta, dst_m, work)
4238end subroutine fmm_m2m_ztranslate
4239
4263subroutine fmm_m2m_ztranslate_work(z, src_r, dst_r, p, vscales, vcnk, alpha, &
4264 & src_m, beta, dst_m, work)
4265 ! Inputs
4266 integer, intent(in) :: p
4267 real(dp), intent(in) :: z, src_r, dst_r, vscales((p+1)*(p+1)), &
4268 & vcnk((2*p+1)*(p+1)), alpha, src_m((p+1)*(p+1)), beta
4269 ! Output
4270 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
4271 ! Temporary workspace
4272 real(dp), intent(out), target :: work(2*(p+1))
4273 ! Local variables
4274 real(dp) :: r1, r2, tmp1, tmp2, tmp3, res1, res2, pow_r1
4275 integer :: j, k, n, indj, indjn, indjk1, indjk2
4276 ! Pointers for temporary values of powers
4277 real(dp), pointer :: pow_r2(:)
4278 ! In case alpha is zero just do a proper scaling of output
4279 if (alpha .eq. zero) then
4280 if (beta .eq. zero) then
4281 dst_m = zero
4282 else
4283 dst_m = beta * dst_m
4284 end if
4285 return
4286 end if
4287 ! Now alpha is non-zero
4288 ! If harmonics have different centers
4289 if (z .ne. 0) then
4290 ! Prepare pointers
4291 pow_r2(1:p+1) => work(1:p+1)
4292 ! Get ratios r1 and r2
4293 r1 = src_r / dst_r
4294 r2 = z / dst_r
4295 ! Get powers of ratio r2/r1
4296 r2 = r2 / r1
4297 pow_r1 = r1
4298 pow_r2(1) = one
4299 do j = 2, p+1
4300 pow_r2(j) = pow_r2(j-1) * r2
4301 end do
4302 ! Do actual M2M
4303 ! Overwrite output if beta is zero
4304 if (beta .eq. zero) then
4305 do j = 0, p
4306 ! Offset for dst_m
4307 indj = j*j + j + 1
4308 ! k = 0
4309 tmp1 = alpha * pow_r1 * vscales(indj)
4310 pow_r1 = pow_r1 * r1
4311 tmp2 = tmp1
4312 res1 = zero
4313 ! Offset for vcnk
4314 indjk1 = j*(j+1)/2 + 1
4315 do n = 0, j
4316 ! Offset for src_m
4317 indjn = (j-n)**2 + (j-n) + 1
4318 tmp3 = pow_r2(n+1) / &
4319 & vscales(indjn) * vcnk(indjk1+n)**2
4320 res1 = res1 + tmp3*src_m(indjn)
4321 end do
4322 dst_m(indj) = tmp2 * res1
4323 ! k != 0
4324 do k = 1, j
4325 tmp2 = tmp1
4326 res1 = zero
4327 res2 = zero
4328 ! Offsets for vcnk
4329 indjk1 = (j-k)*(j-k+1)/2 + 1
4330 indjk2 = (j+k)*(j+k+1)/2 + 1
4331 do n = 0, j-k
4332 ! Offset for src_m
4333 indjn = (j-n)**2 + (j-n) + 1
4334 tmp3 = pow_r2(n+1) / &
4335 & vscales(indjn) * vcnk(indjk1+n) * &
4336 & vcnk(indjk2+n)
4337 res1 = res1 + tmp3*src_m(indjn+k)
4338 res2 = res2 + tmp3*src_m(indjn-k)
4339 end do
4340 dst_m(indj+k) = tmp2 * res1
4341 dst_m(indj-k) = tmp2 * res2
4342 end do
4343 end do
4344 ! Update output if beta is non-zero
4345 else
4346 do j = 0, p
4347 ! Offset for dst_m
4348 indj = j*j + j + 1
4349 ! k = 0
4350 tmp1 = alpha * pow_r1 * vscales(indj)
4351 pow_r1 = pow_r1 * r1
4352 tmp2 = tmp1
4353 res1 = zero
4354 ! Offset for vcnk
4355 indjk1 = j * (j+1) /2 + 1
4356 do n = 0, j
4357 ! Offset for src_m
4358 indjn = (j-n)**2 + (j-n) + 1
4359 tmp3 = pow_r2(n+1) / &
4360 & vscales(indjn) * vcnk(indjk1+n)**2
4361 res1 = res1 + tmp3*src_m(indjn)
4362 end do
4363 dst_m(indj) = beta*dst_m(indj) + tmp2*res1
4364 ! k != 0
4365 do k = 1, j
4366 tmp2 = tmp1
4367 res1 = zero
4368 res2 = zero
4369 ! Offsets for vcnk
4370 indjk1 = (j-k)*(j-k+1)/2 + 1
4371 indjk2 = (j+k)*(j+k+1)/2 + 1
4372 do n = 0, j-k
4373 ! Offset for src_m
4374 indjn = (j-n)**2 + (j-n) + 1
4375 tmp3 = pow_r2(n+1) / &
4376 & vscales(indjn) * vcnk(indjk1+n) * &
4377 & vcnk(indjk2+n)
4378 res1 = res1 + tmp3*src_m(indjn+k)
4379 res2 = res2 + tmp3*src_m(indjn-k)
4380 end do
4381 dst_m(indj+k) = beta*dst_m(indj+k) + tmp2*res1
4382 dst_m(indj-k) = beta*dst_m(indj-k) + tmp2*res2
4383 end do
4384 end do
4385 end if
4386 ! If harmonics are located at the same point
4387 else
4388 ! Overwrite output if beta is zero
4389 if (beta .eq. zero) then
4390 r1 = src_r / dst_r
4391 tmp1 = alpha * r1
4392 do j = 0, p
4393 indj = j*j + j + 1
4394 do k = indj-j, indj+j
4395 dst_m(k) = tmp1 * src_m(k)
4396 end do
4397 tmp1 = tmp1 * r1
4398 end do
4399 ! Update output if beta is non-zero
4400 else
4401 r1 = src_r / dst_r
4402 tmp1 = alpha * r1
4403 do j = 0, p
4404 indj = j*j + j + 1
4405 do k = indj-j, indj+j
4406 dst_m(k) = beta*dst_m(k) + tmp1*src_m(k)
4407 end do
4408 tmp1 = tmp1 * r1
4409 end do
4410 end if
4411 end if
4412end subroutine fmm_m2m_ztranslate_work
4413
4435subroutine fmm_m2m_bessel_ztranslate(z, src_sk, dst_sk, p, vscales, alpha, &
4436 & src_m, beta, dst_m)
4437 ! Inputs
4438 integer, intent(in) :: p
4439 real(dp), intent(in) :: z, src_sk(p+1), dst_sk(p+1), vscales((p+1)*(p+1)), &
4440 & alpha, src_m((p+1)*(p+1)), beta
4441 ! Output
4442 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
4443 ! Temporary workspace
4444 real(dp) :: work(2*p+1)
4445 complex(dp) :: work_complex(2*p+1)
4446 ! Call corresponding work routine
4447 call fmm_m2m_bessel_ztranslate_work(z, src_sk, dst_sk, p, vscales, &
4448 & alpha, src_m, beta, dst_m, work, work_complex)
4449end subroutine fmm_m2m_bessel_ztranslate
4450
4474subroutine fmm_m2m_bessel_ztranslate_work(z, src_sk, dst_sk, p, vscales, &
4475 & alpha, src_m, beta, dst_m, work, work_complex)
4476 use complex_bessel
4477 ! Inputs
4478 integer, intent(in) :: p
4479 real(dp), intent(in) :: z, src_sk(p+1), dst_sk(p+1), vscales((p+1)*(p+1)), &
4480 & alpha, src_m((p+1)*(p+1)), beta
4481 ! Output
4482 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
4483 ! Temporary workspace
4484 real(dp), intent(out), target :: work(2*p+1)
4485 complex(dp), intent(out) :: work_complex(2*p+1)
4486 ! Local variables
4487 real(dp) :: r1, tmp1, tmp2, tmp3, res1, res2
4488 integer :: j, k, n, indj, indn, l, NZ, ierr
4489 ! Pointers for temporary values of powers
4490 real(dp) :: fact(2*p+1)
4491 complex(dp) :: complex_argument
4492 ! In case alpha is zero just do a proper scaling of output
4493 if (alpha .eq. zero) then
4494 if (beta .eq. zero) then
4495 dst_m = zero
4496 else
4497 dst_m = beta * dst_m
4498 end if
4499 return
4500 end if
4501 ! Get factorials
4502 fact(1) = one
4503 do j = 1, 2*p
4504 fact(j+1) = dble(j) * fact(j)
4505 end do
4506 ! Now alpha is non-zero
4507 ! If harmonics have different centers
4508 if (z .ne. 0) then
4509 complex_argument = z
4510 call cbesi(complex_argument, pt5, 1, 1, work_complex(1), nz, ierr)
4511 work(1) = sqrt(pi/(2*z)) * real(work_complex(1))
4512 if (p .gt. 0) then
4513 call cbesi(complex_argument, 1.5d0, 1, 2*p, &
4514 & work_complex(2:2*p+1), nz, ierr)
4515 work(2:2*p+1) = sqrt(pi/(2*z)) * real(work_complex(2:2*p+1))
4516 end if
4517 ! Do actual M2M
4518 ! Overwrite output if beta is zero
4519 if (beta .eq. zero) then
4520 dst_m = zero
4521 do j = 0, p
4522 ! Offset for dst_m
4523 indj = j*j + j + 1
4524 ! k = 0
4525 k = 0
4526 res1 = zero
4527 do n = 0, p
4528 ! Offset for src_m
4529 indn = n*n + n + 1
4530 tmp1 = vscales(indn) * &
4531 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
4532 & src_sk(n+1) / vscales(indj) * dst_sk(j+1)
4533 tmp3 = zero
4534 do l = 0, min(j, n)
4535 tmp2 = (two**(-l)) / &
4536 & fact(l+1)*fact(2*l+1)/ &
4537 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
4538 & (work(j+n-l+1) / (z**l))
4539 tmp3 = tmp3 + tmp2
4540 end do
4541 tmp3 = tmp3 * tmp1
4542 res1 = res1 + tmp3*src_m(indn)
4543 end do
4544 dst_m(indj) = res1
4545 ! k != 0
4546 do k = 1, j
4547 res1 = zero
4548 res2 = zero
4549 do n = k, p
4550 ! Offset for src_m
4551 indn = n*n + n + 1
4552 tmp1 = vscales(indn+k) * &
4553 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
4554 & src_sk(n+1) / vscales(indj+k) * dst_sk(j+1)
4555 tmp3 = zero
4556 do l = k, min(j, n)
4557 tmp2 = (two**(-l)) / &
4558 & fact(l+k+1)*fact(2*l+1)/ &
4559 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
4560 & (work(j+n-l+1) / (z**l))
4561 tmp3 = tmp3 + tmp2
4562 end do
4563 tmp3 = tmp3 * tmp1
4564 res1 = res1 + tmp3*src_m(indn+k)
4565 res2 = res2 + tmp3*src_m(indn-k)
4566 end do
4567 dst_m(indj+k) = res1
4568 dst_m(indj-k) = res2
4569 end do
4570 end do
4571 ! Update output if beta is non-zero
4572 else
4573 do j = 0, p
4574 ! Offset for dst_m
4575 indj = j*j + j + 1
4576 ! k = 0
4577 k = 0
4578 res1 = zero
4579 do n = 0, p
4580 ! Offset for src_m
4581 indn = n*n + n + 1
4582 tmp1 = vscales(indn) * &
4583 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
4584 & src_sk(n+1) / vscales(indj) * dst_sk(j+1)
4585 tmp3 = zero
4586 do l = 0, min(j, n)
4587 tmp2 = (two**(-l)) / &
4588 & fact(l+1)*fact(2*l+1)/ &
4589 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
4590 & (work(j+n-l+1) / (z**l))
4591 tmp3 = tmp3 + tmp2
4592 end do
4593 tmp3 = tmp3 * tmp1
4594 res1 = res1 + tmp3*src_m(indn)
4595 end do
4596 dst_m(indj) = beta*dst_m(indj) + res1
4597 ! k != 0
4598 do k = 1, j
4599 res1 = zero
4600 res2 = zero
4601 do n = k, p
4602 ! Offset for src_m
4603 indn = n*n + n + 1
4604 tmp1 = vscales(indn+k) * &
4605 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
4606 & src_sk(n+1) / vscales(indj+k) * dst_sk(j+1)
4607 tmp3 = zero
4608 do l = k, min(j, n)
4609 tmp2 = (two**(-l)) / &
4610 & fact(l+k+1)*fact(2*l+1)/ &
4611 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
4612 & (work(j+n-l+1) / (z**l))
4613 tmp3 = tmp3 + tmp2
4614 end do
4615 tmp3 = tmp3 * tmp1
4616 res1 = res1 + tmp3*src_m(indn+k)
4617 res2 = res2 + tmp3*src_m(indn-k)
4618 end do
4619 dst_m(indj+k) = beta*dst_m(indj+k) + res1
4620 dst_m(indj-k) = beta*dst_m(indj-k) + res2
4621 end do
4622 end do
4623 end if
4624 ! If harmonics are located at the same point
4625 else
4626 r1 = zero
4627 ! Overwrite output if beta is zero
4628 if (beta .eq. zero) then
4629 !r1 = src_r / dst_r
4630 tmp1 = alpha * r1
4631 do j = 0, p
4632 indj = j*j + j + 1
4633 do k = indj-j, indj+j
4634 dst_m(k) = tmp1 * src_m(k)
4635 end do
4636 tmp1 = tmp1 * r1
4637 end do
4638 ! Update output if beta is non-zero
4639 else
4640 !r1 = src_r / dst_r
4641 tmp1 = alpha * r1
4642 do j = 0, p
4643 indj = j*j + j + 1
4644 do k = indj-j, indj+j
4645 dst_m(k) = beta*dst_m(k) + tmp1*src_m(k)
4646 end do
4647 tmp1 = tmp1 * r1
4648 end do
4649 end if
4650 end if
4651end subroutine fmm_m2m_bessel_ztranslate_work
4652
4674subroutine fmm_m2m_bessel_ztranslate_adj(z, src_sk, dst_sk, p, vscales, alpha, &
4675 & src_m, beta, dst_m)
4676 ! Inputs
4677 integer, intent(in) :: p
4678 real(dp), intent(in) :: z, src_sk(p+1), dst_sk(p+1), vscales((p+1)*(p+1)), &
4679 & alpha, src_m((p+1)*(p+1)), beta
4680 ! Output
4681 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
4682 ! Temporary workspace
4683 real(dp) :: work(2*p+1)
4684 complex(dp) :: work_complex(2*p+1)
4685 ! Call corresponding work routine
4686 call fmm_m2m_bessel_ztranslate_adj_work(z, src_sk, dst_sk, p, vscales, &
4687 & alpha, src_m, beta, dst_m, work, work_complex)
4688end subroutine fmm_m2m_bessel_ztranslate_adj
4689
4713subroutine fmm_m2m_bessel_ztranslate_adj_work(z, src_sk, dst_sk, p, vscales, &
4714 & alpha, src_m, beta, dst_m, work, work_complex)
4715 use complex_bessel
4716 ! Inputs
4717 integer, intent(in) :: p
4718 real(dp), intent(in) :: z, src_sk(p+1), dst_sk(p+1), vscales((p+1)*(p+1)), &
4719 & alpha, src_m((p+1)*(p+1)), beta
4720 ! Output
4721 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
4722 ! Temporary workspace
4723 real(dp), intent(out), target :: work(2*p+1)
4724 complex(dp), intent(out) :: work_complex(2*p+1)
4725 ! Local variables
4726 real(dp) :: r1, tmp1, tmp2, tmp3, res1, res2
4727 integer :: j, k, n, indj, indn, l, NZ, ierr
4728 ! Pointers for temporary values of powers
4729 real(dp) :: fact(2*p+1)
4730 complex(dp) :: complex_argument
4731 ! In case alpha is zero just do a proper scaling of output
4732 if (alpha .eq. zero) then
4733 if (beta .eq. zero) then
4734 dst_m = zero
4735 else
4736 dst_m = beta * dst_m
4737 end if
4738 return
4739 end if
4740 ! Get factorials
4741 fact(1) = one
4742 do j = 1, 2*p
4743 fact(j+1) = dble(j) * fact(j)
4744 end do
4745 ! Now alpha is non-zero
4746 ! If harmonics have different centers
4747 if (z .ne. 0) then
4748 complex_argument = z
4749 call cbesi(complex_argument, pt5, 1, 1, work_complex(1), nz, ierr)
4750 work(1) = sqrt(pi/(2*z)) * real(work_complex(1))
4751 if (p .gt. 0) then
4752 call cbesi(complex_argument, 1.5d0, 1, 2*p, &
4753 & work_complex(2:2*p+1), nz, ierr)
4754 work(2:2*p+1) = sqrt(pi/(2*z)) * real(work_complex(2:2*p+1))
4755 end if
4756 ! Do actual M2M
4757 ! Overwrite output if beta is zero
4758 if (beta .eq. zero) then
4759 dst_m = zero
4760 do n = 0, p
4761 ! Offset for dst_m
4762 indn = n*n + n + 1
4763 ! k = 0
4764 k = 0
4765 res1 = zero
4766 do j = 0, p
4767 ! Offset for src_m
4768 indj = j*j + j + 1
4769 tmp1 = vscales(indn) * &
4770 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
4771 & src_sk(n+1) / vscales(indj) * dst_sk(j+1)
4772 tmp3 = zero
4773 do l = 0, min(j, n)
4774 tmp2 = (two**(-l)) / &
4775 & fact(l+1)*fact(2*l+1)/ &
4776 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
4777 & (work(j+n-l+1) / (z**l))
4778 tmp3 = tmp3 + tmp2
4779 end do
4780 tmp3 = tmp3 * tmp1
4781 res1 = res1 + tmp3*src_m(indj)
4782 end do
4783 dst_m(indn) = res1
4784 ! k != 0
4785 do k = 1, n
4786 res1 = zero
4787 res2 = zero
4788 do j = k, p
4789 ! Offset for src_m
4790 indj = j*j + j + 1
4791 tmp1 = vscales(indn+k) * &
4792 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
4793 & src_sk(n+1) / vscales(indj+k) * dst_sk(j+1)
4794 tmp3 = zero
4795 do l = k, min(j, n)
4796 tmp2 = (two**(-l)) / &
4797 & fact(l+k+1)*fact(2*l+1)/ &
4798 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
4799 & (work(j+n-l+1) / (z**l))
4800 tmp3 = tmp3 + tmp2
4801 end do
4802 tmp3 = tmp3 * tmp1
4803 res1 = res1 + tmp3*src_m(indj+k)
4804 res2 = res2 + tmp3*src_m(indj-k)
4805 end do
4806 dst_m(indn+k) = res1
4807 dst_m(indn-k) = res2
4808 end do
4809 end do
4810 ! Update output if beta is non-zero
4811 else
4812 do n = 0, p
4813 ! Offset for dst_m
4814 indn = n*n + n + 1
4815 ! k = 0
4816 k = 0
4817 res1 = zero
4818 do j = 0, p
4819 ! Offset for src_m
4820 indj = j*j + j + 1
4821 tmp1 = vscales(indn) * &
4822 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
4823 & src_sk(n+1) / vscales(indj) * dst_sk(j+1)
4824 tmp3 = zero
4825 do l = 0, min(j, n)
4826 tmp2 = (two**(-l)) / &
4827 & fact(l+1)*fact(2*l+1)/ &
4828 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
4829 & (work(j+n-l+1) / (z**l))
4830 tmp3 = tmp3 + tmp2
4831 end do
4832 tmp3 = tmp3 * tmp1
4833 res1 = res1 + tmp3*src_m(indj)
4834 end do
4835 dst_m(indn) = beta*dst_m(indn) + res1
4836 ! k != 0
4837 do k = 1, n
4838 res1 = zero
4839 res2 = zero
4840 do j = k, p
4841 ! Offset for src_m
4842 indj = j*j + j + 1
4843 tmp1 = vscales(indn+k) * &
4844 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
4845 & src_sk(n+1) / vscales(indj+k) * dst_sk(j+1)
4846 tmp3 = zero
4847 do l = k, min(j, n)
4848 tmp2 = (two**(-l)) / &
4849 & fact(l+k+1)*fact(2*l+1)/ &
4850 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
4851 & (work(j+n-l+1) / (z**l))
4852 tmp3 = tmp3 + tmp2
4853 end do
4854 tmp3 = tmp3 * tmp1
4855 res1 = res1 + tmp3*src_m(indj+k)
4856 res2 = res2 + tmp3*src_m(indj-k)
4857 end do
4858 dst_m(indn+k) = beta*dst_m(indn+k) + res1
4859 dst_m(indn-k) = beta*dst_m(indn-k) + res2
4860 end do
4861 end do
4862 end if
4863 ! If harmonics are located at the same point
4864 else
4865 r1 = zero
4866 ! Overwrite output if beta is zero
4867 if (beta .eq. zero) then
4868 !r1 = src_r / dst_r
4869 tmp1 = alpha * r1
4870 do j = 0, p
4871 indj = j*j + j + 1
4872 do k = indj-j, indj+j
4873 dst_m(k) = tmp1 * src_m(k)
4874 end do
4875 tmp1 = tmp1 * r1
4876 end do
4877 ! Update output if beta is non-zero
4878 else
4879 !r1 = src_r / dst_r
4880 tmp1 = alpha * r1
4881 do j = 0, p
4882 indj = j*j + j + 1
4883 do k = indj-j, indj+j
4884 dst_m(k) = beta*dst_m(k) + tmp1*src_m(k)
4885 end do
4886 tmp1 = tmp1 * r1
4887 end do
4888 end if
4889 end if
4891
4915subroutine fmm_m2m_bessel_derivative_ztranslate_work(src_sk, p, vscales, &
4916 & alpha, src_m, beta, dst_m)
4917 use complex_bessel
4918 ! Inputs
4919 integer, intent(in) :: p
4920 real(dp), intent(in) :: src_sk(p+2), vscales((p+2)*(p+2)), &
4921 & alpha, src_m((p+1)*(p+1)), beta
4922 ! Output
4923 real(dp), intent(inout) :: dst_m((p+2)*(p+2))
4924 ! Temporary workspace
4925 ! Local variables
4926 real(dp) :: tmp1, tmp2, tmp3, res1, res2
4927 integer :: j, k, n, indj, indn, l
4928 ! Pointers for temporary values of powers
4929 real(dp) :: fact(2*p+1), fact2(p+2)
4930 ! In case alpha is zero just do a proper scaling of output
4931 if (alpha .eq. zero) then
4932 if (beta .eq. zero) then
4933 dst_m = zero
4934 else
4935 dst_m = beta * dst_m
4936 end if
4937 return
4938 end if
4939 ! Get factorials
4940 fact(1) = one
4941 do j = 1, 2*p
4942 fact(j+1) = dble(j) * fact(j)
4943 end do
4944 fact2(1) = one
4945 do j = 1, p+1
4946 fact2(j+1) = dble(2*j+1) * fact2(j)
4947 end do
4948 ! Now alpha is non-zero
4949 ! Do actual M2M
4950 ! Overwrite output if beta is zero
4951 if (beta .eq. zero) then
4952 dst_m = zero
4953 ! j=0, k=0, n=1, l=0
4954 j = 0
4955 k = 0
4956 n = 1
4957 l = 0
4958 indj = 1
4959 indn = 3
4960 dst_m(1) = vscales(3) / src_sk(2) / vscales(1) * src_sk(1) / fact2(2) * &
4961 & src_m(3) * alpha
4962 ! j=1..p-1
4963 do j = 1, p-1
4964 ! Offset for dst_m
4965 indj = j*j + j + 1
4966 ! k = 0
4967 k = 0
4968 res1 = zero
4969 ! n=j-1
4970 n = j-1
4971 ! Offset for src_m
4972 indn = n*n + n + 1
4973 tmp1 = vscales(indn) * &
4974 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
4975 & src_sk(n+1) / vscales(indj) * src_sk(j+1) * alpha
4976 tmp3 = zero
4977 ! l=n
4978 l = n
4979 tmp2 = (two**(-l)) / &
4980 & fact(l+1)*fact(2*l+1)/ &
4981 & fact(l+1)/fact(l+1)/ &
4982 & fact2(j+1)
4983 tmp3 = tmp3 + tmp2
4984 tmp3 = tmp3 * tmp1
4985 res1 = res1 + tmp3*src_m(indn)
4986 ! n=j+1
4987 n = j+1
4988 ! Offset for src_m
4989 indn = n*n + n + 1
4990 tmp1 = vscales(indn) * &
4991 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
4992 & src_sk(n+1) / vscales(indj) * src_sk(j+1) * alpha
4993 tmp3 = zero
4994 ! l=j
4995 l = j
4996 tmp2 = (two**(-l)) / &
4997 & fact(l+1)*fact(2*l+1)/ &
4998 & fact(l+1)/fact(l+1)/ &
4999 & fact2(j+2)
5000 tmp3 = tmp3 + tmp2
5001 tmp3 = tmp3 * tmp1
5002 res1 = res1 + tmp3*src_m(indn)
5003 dst_m(indj) = res1
5004 ! k=1..j-1
5005 do k = 1, j-1
5006 res1 = zero
5007 res2 = zero
5008 ! n=j-1
5009 n = j-1
5010 ! Offset for src_m
5011 indn = n*n + n + 1
5012 tmp1 = vscales(indn+k) * &
5013 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
5014 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5015 tmp3 = zero
5016 l = n
5017 tmp2 = (two**(-l)) / &
5018 & fact(l+k+1)*fact(2*l+1)/ &
5019 & fact(l+1)/fact(l-k+1)/ &
5020 & fact2(j+1)
5021 tmp3 = tmp3 + tmp2
5022 tmp3 = tmp3 * tmp1
5023 res1 = res1 + tmp3*src_m(indn+k)
5024 res2 = res2 + tmp3*src_m(indn-k)
5025 ! n=j+1
5026 n = j+1
5027 ! Offset for src_m
5028 indn = n*n + n + 1
5029 tmp1 = vscales(indn+k) * &
5030 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
5031 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5032 tmp3 = zero
5033 l = j
5034 tmp2 = (two**(-l)) / &
5035 & fact(l+k+1)*fact(2*l+1)/ &
5036 & fact(l+1)/fact(l-k+1)/ &
5037 & fact2(j+2)
5038 tmp3 = tmp3 + tmp2
5039 tmp3 = tmp3 * tmp1
5040 res1 = res1 + tmp3*src_m(indn+k)
5041 res2 = res2 + tmp3*src_m(indn-k)
5042 dst_m(indj+k) = res1
5043 dst_m(indj-k) = res2
5044 end do
5045 ! k=j
5046 k = j
5047 res1 = zero
5048 res2 = zero
5049 ! n=j+1
5050 n = j+1
5051 ! Offset for src_m
5052 indn = n*n + n + 1
5053 tmp1 = vscales(indn+k) * &
5054 & dble(2*j+1) * fact(n+k+1) / &
5055 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5056 tmp3 = zero
5057 l = j
5058 tmp2 = (two**(-l)) / &
5059 & fact(l+k+1)*fact(2*l+1)/ &
5060 & fact(l+1)/ &
5061 & fact2(j+2)
5062 tmp3 = tmp3 + tmp2
5063 tmp3 = tmp3 * tmp1
5064 res1 = res1 + tmp3*src_m(indn+k)
5065 res2 = res2 + tmp3*src_m(indn-k)
5066 dst_m(indj+k) = res1
5067 dst_m(indj-k) = res2
5068 end do
5069 ! j=p, n=p-1, l=p-1
5070 j = p
5071 n = p-1
5072 ! Offset for dst_m
5073 indj = j*j + j + 1
5074 ! k = 0
5075 k = 0
5076 res1 = zero
5077 ! Offset for src_m
5078 indn = n*n + n + 1
5079 tmp1 = vscales(indn) * &
5080 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
5081 & src_sk(n+1) / vscales(indj) * src_sk(j+1) * alpha
5082 tmp3 = zero
5083 l = p-1
5084 tmp2 = (two**(-l)) / &
5085 & fact(l+1)*fact(2*l+1)/ &
5086 & fact(l+1)/fact(l+1)/ &
5087 & fact2(j+1)
5088 tmp3 = tmp3 + tmp2
5089 tmp3 = tmp3 * tmp1
5090 res1 = res1 + tmp3*src_m(indn)
5091 dst_m(indj) = res1
5092 ! k=1..p-1
5093 do k = 1, p-1
5094 res1 = zero
5095 res2 = zero
5096 ! n=j-1
5097 n = j-1
5098 ! Offset for src_m
5099 indn = n*n + n + 1
5100 tmp1 = vscales(indn+k) * &
5101 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
5102 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5103 tmp3 = zero
5104 l = n
5105 tmp2 = (two**(-l)) / &
5106 & fact(l+k+1)*fact(2*l+1)/ &
5107 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)/ &
5108 & fact2(j+1)
5109 tmp3 = tmp3 + tmp2
5110 tmp3 = tmp3 * tmp1
5111 res1 = res1 + tmp3*src_m(indn+k)
5112 res2 = res2 + tmp3*src_m(indn-k)
5113 dst_m(indj+k) = res1
5114 dst_m(indj-k) = res2
5115 end do
5116 ! j=p,k=p is impossible because n=p-1 or n=p+1 is impossible
5117 ! j=p+1, n=p, l=p
5118 j = p+1
5119 n = p
5120 ! Offset for dst_m
5121 indj = j*j + j + 1
5122 ! k = 0
5123 k = 0
5124 res1 = zero
5125 ! Offset for src_m
5126 indn = n*n + n + 1
5127 tmp1 = vscales(indn) * &
5128 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
5129 & src_sk(n+1) / vscales(indj) * src_sk(j+1) * alpha
5130 tmp3 = zero
5131 l = n
5132 tmp2 = (two**(-l)) / &
5133 & fact(l+1)*fact(2*l+1)/ &
5134 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)/ &
5135 & fact2(j+1)
5136 tmp3 = tmp3 + tmp2
5137 tmp3 = tmp3 * tmp1
5138 res1 = res1 + tmp3*src_m(indn)
5139 dst_m(indj) = res1
5140 ! k != 0
5141 do k = 1, p
5142 res1 = zero
5143 res2 = zero
5144 ! n=j-1
5145 n = p
5146 ! Offset for src_m
5147 indn = n*n + n + 1
5148 tmp1 = vscales(indn+k) * &
5149 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
5150 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5151 tmp3 = zero
5152 l = n
5153 tmp2 = (two**(-l)) / &
5154 & fact(l+k+1)*fact(2*l+1)/ &
5155 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)/ &
5156 & fact2(j+1)
5157 tmp3 = tmp3 + tmp2
5158 tmp3 = tmp3 * tmp1
5159 res1 = res1 + tmp3*src_m(indn+k)
5160 res2 = res2 + tmp3*src_m(indn-k)
5161 dst_m(indj+k) = res1
5162 dst_m(indj-k) = res2
5163 end do
5164 ! Update output if beta is non-zero
5165 else
5166 ! j=0, k=0, n=1, l=0
5167 j = 0
5168 k = 0
5169 n = 1
5170 l = 0
5171 indj = 1
5172 indn = 3
5173 dst_m(1) = vscales(3) / src_sk(2) / vscales(1) * src_sk(1) / fact2(2) * &
5174 & src_m(3) * alpha + dst_m(1)*beta
5175 ! j=1..p-1
5176 do j = 1, p-1
5177 ! Offset for dst_m
5178 indj = j*j + j + 1
5179 ! k = 0
5180 k = 0
5181 res1 = zero
5182 ! n=j-1
5183 n = j-1
5184 ! Offset for src_m
5185 indn = n*n + n + 1
5186 tmp1 = vscales(indn) * &
5187 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
5188 & src_sk(n+1) / vscales(indj) * src_sk(j+1) * alpha
5189 tmp3 = zero
5190 ! l=n
5191 l = n
5192 tmp2 = (two**(-l)) / &
5193 & fact(l+1)*fact(2*l+1)/ &
5194 & fact(l+1)/fact(l+1)/ &
5195 & fact2(j+1)
5196 tmp3 = tmp3 + tmp2
5197 tmp3 = tmp3 * tmp1
5198 res1 = res1 + tmp3*src_m(indn)
5199 ! n=j+1
5200 n = j+1
5201 ! Offset for src_m
5202 indn = n*n + n + 1
5203 tmp1 = vscales(indn) * &
5204 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
5205 & src_sk(n+1) / vscales(indj) * src_sk(j+1) * alpha
5206 tmp3 = zero
5207 ! l=j
5208 l = j
5209 tmp2 = (two**(-l)) / &
5210 & fact(l+1)*fact(2*l+1)/ &
5211 & fact(l+1)/fact(l+1)/ &
5212 & fact2(j+2)
5213 tmp3 = tmp3 + tmp2
5214 tmp3 = tmp3 * tmp1
5215 res1 = res1 + tmp3*src_m(indn)
5216 dst_m(indj) = beta*dst_m(indj) + res1
5217 ! k=1..j-1
5218 do k = 1, j-1
5219 res1 = zero
5220 res2 = zero
5221 ! n=j-1
5222 n = j-1
5223 ! Offset for src_m
5224 indn = n*n + n + 1
5225 tmp1 = vscales(indn+k) * &
5226 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
5227 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5228 tmp3 = zero
5229 l = n
5230 tmp2 = (two**(-l)) / &
5231 & fact(l+k+1)*fact(2*l+1)/ &
5232 & fact(l+1)/fact(l-k+1)/ &
5233 & fact2(j+1)
5234 tmp3 = tmp3 + tmp2
5235 tmp3 = tmp3 * tmp1
5236 res1 = res1 + tmp3*src_m(indn+k)
5237 res2 = res2 + tmp3*src_m(indn-k)
5238 ! n=j+1
5239 n = j+1
5240 ! Offset for src_m
5241 indn = n*n + n + 1
5242 tmp1 = vscales(indn+k) * &
5243 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
5244 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5245 tmp3 = zero
5246 l = j
5247 tmp2 = (two**(-l)) / &
5248 & fact(l+k+1)*fact(2*l+1)/ &
5249 & fact(l+1)/fact(l-k+1)/ &
5250 & fact2(j+2)
5251 tmp3 = tmp3 + tmp2
5252 tmp3 = tmp3 * tmp1
5253 res1 = res1 + tmp3*src_m(indn+k)
5254 res2 = res2 + tmp3*src_m(indn-k)
5255 dst_m(indj+k) = beta*dst_m(indj+k) + res1
5256 dst_m(indj-k) = beta*dst_m(indj-k) + res2
5257 end do
5258 ! k=j
5259 k = j
5260 res1 = zero
5261 res2 = zero
5262 ! n=j+1
5263 n = j+1
5264 ! Offset for src_m
5265 indn = n*n + n + 1
5266 tmp1 = vscales(indn+k) * &
5267 & dble(2*j+1) * fact(n+k+1) / &
5268 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5269 tmp3 = zero
5270 l = j
5271 tmp2 = (two**(-l)) / &
5272 & fact(l+k+1)*fact(2*l+1)/ &
5273 & fact(l+1)/ &
5274 & fact2(j+2)
5275 tmp3 = tmp3 + tmp2
5276 tmp3 = tmp3 * tmp1
5277 res1 = res1 + tmp3*src_m(indn+k)
5278 res2 = res2 + tmp3*src_m(indn-k)
5279 dst_m(indj+k) = beta*dst_m(indj+k) + res1
5280 dst_m(indj-k) = beta*dst_m(indj-k) + res2
5281 end do
5282 ! j=p, n=p-1, l=p-1
5283 j = p
5284 n = p-1
5285 ! Offset for dst_m
5286 indj = j*j + j + 1
5287 ! k = 0
5288 k = 0
5289 res1 = zero
5290 ! Offset for src_m
5291 indn = n*n + n + 1
5292 tmp1 = vscales(indn) * &
5293 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
5294 & src_sk(n+1) / vscales(indj) * src_sk(j+1) * alpha
5295 tmp3 = zero
5296 l = p-1
5297 tmp2 = (two**(-l)) / &
5298 & fact(l+1)*fact(2*l+1)/ &
5299 & fact(l+1)/fact(l+1)/ &
5300 & fact2(j+1)
5301 tmp3 = tmp3 + tmp2
5302 tmp3 = tmp3 * tmp1
5303 res1 = res1 + tmp3*src_m(indn)
5304 dst_m(indj) = beta*dst_m(indj) + res1
5305 ! k=1..p-1
5306 do k = 1, p-1
5307 res1 = zero
5308 res2 = zero
5309 ! n=j-1
5310 n = j-1
5311 ! Offset for src_m
5312 indn = n*n + n + 1
5313 tmp1 = vscales(indn+k) * &
5314 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
5315 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5316 tmp3 = zero
5317 l = n
5318 tmp2 = (two**(-l)) / &
5319 & fact(l+k+1)*fact(2*l+1)/ &
5320 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)/ &
5321 & fact2(j+1)
5322 tmp3 = tmp3 + tmp2
5323 tmp3 = tmp3 * tmp1
5324 res1 = res1 + tmp3*src_m(indn+k)
5325 res2 = res2 + tmp3*src_m(indn-k)
5326 dst_m(indj+k) = beta*dst_m(indj+k) + res1
5327 dst_m(indj-k) = beta*dst_m(indj-k) + res2
5328 end do
5329 ! j=p,k=p is impossible because n=p-1 or n=p+1 is impossible
5330 ! j=p+1, n=p, l=p
5331 j = p+1
5332 n = p
5333 ! Offset for dst_m
5334 indj = j*j + j + 1
5335 ! k = 0
5336 k = 0
5337 res1 = zero
5338 ! Offset for src_m
5339 indn = n*n + n + 1
5340 tmp1 = vscales(indn) * &
5341 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
5342 & src_sk(n+1) / vscales(indj) * src_sk(j+1) * alpha
5343 tmp3 = zero
5344 l = n
5345 tmp2 = (two**(-l)) / &
5346 & fact(l+1)*fact(2*l+1)/ &
5347 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)/ &
5348 & fact2(j+1)
5349 tmp3 = tmp3 + tmp2
5350 tmp3 = tmp3 * tmp1
5351 res1 = res1 + tmp3*src_m(indn)
5352 dst_m(indj) = beta*dst_m(indj) + res1
5353 ! k != 0
5354 do k = 1, p
5355 res1 = zero
5356 res2 = zero
5357 ! n=j-1
5358 n = p
5359 ! Offset for src_m
5360 indn = n*n + n + 1
5361 tmp1 = vscales(indn+k) * &
5362 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
5363 & src_sk(n+1) / vscales(indj+k) * src_sk(j+1) * alpha
5364 tmp3 = zero
5365 l = n
5366 tmp2 = (two**(-l)) / &
5367 & fact(l+k+1)*fact(2*l+1)/ &
5368 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)/ &
5369 & fact2(j+1)
5370 tmp3 = tmp3 + tmp2
5371 tmp3 = tmp3 * tmp1
5372 res1 = res1 + tmp3*src_m(indn+k)
5373 res2 = res2 + tmp3*src_m(indn-k)
5374 dst_m(indj+k) = beta*dst_m(indj+k) + res1
5375 dst_m(indj-k) = beta*dst_m(indj-k) + res2
5376 end do
5377 end if
5379
5402subroutine fmm_m2m_ztranslate_adj(z, src_r, dst_r, p, vscales, vcnk, &
5403 & alpha, src_m, beta, dst_m)
5404 ! Inputs
5405 integer, intent(in) :: p
5406 real(dp), intent(in) :: z, src_r, dst_r, vscales((p+1)*(p+1)), &
5407 & vcnk((2*p+1)*(p+1)), alpha, src_m((p+1)*(p+1)), beta
5408 ! Output
5409 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
5410 ! Temporary workspace
5411 real(dp) :: work(2*(p+1))
5412 ! Call corresponding work routine
5413 call fmm_m2m_ztranslate_adj_work(z, src_r, dst_r, p, vscales, vcnk, &
5414 & alpha, src_m, beta, dst_m, work)
5415end subroutine fmm_m2m_ztranslate_adj
5416
5440subroutine fmm_m2m_ztranslate_adj_work(z, src_r, dst_r, p, vscales, vcnk, &
5441 & alpha, src_m, beta, dst_m, work)
5442 ! Inputs
5443 integer, intent(in) :: p
5444 real(dp), intent(in) :: z, src_r, dst_r, vscales((p+1)*(p+1)), &
5445 & vcnk((2*p+1)*(p+1)), alpha, src_m((p+1)*(p+1)), beta
5446 ! Output
5447 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
5448 ! Temporary workspace
5449 real(dp), intent(out), target :: work(2*(p+1))
5450 ! Local variables
5451 real(dp) :: r1, r2, tmp1, tmp2, res1, res2
5452 integer :: j, k, n, indj, indn, indjk1, indjk2
5453 ! Pointers for temporary values of powers
5454 real(dp), pointer :: pow_r1(:), pow_r2(:)
5455 ! In case alpha is zero just do a proper scaling of output
5456 if (alpha .eq. zero) then
5457 if (beta .eq. zero) then
5458 dst_m = zero
5459 else
5460 dst_m = beta * dst_m
5461 end if
5462 return
5463 end if
5464 ! If harmonics have different centers
5465 if (z .ne. 0) then
5466 ! Prepare pointers
5467 pow_r1(1:p+1) => work(1:p+1)
5468 pow_r2(1:p+1) => work(p+2:2*(p+1))
5469 ! Get powers of r1 and r2
5470 r1 = dst_r / src_r
5471 r2 = -z / src_r
5472 pow_r1(1) = r1
5473 pow_r2(1) = one
5474 do j = 2, p+1
5475 pow_r1(j) = pow_r1(j-1) * r1
5476 pow_r2(j) = pow_r2(j-1) * r2
5477 end do
5478 ! Do actual adjoint M2M
5479 ! Overwrite output if beta is zero
5480 if (beta .eq. zero) then
5481 do n = 0, p
5482 ! Offset for dst_m
5483 indn = n*n + n + 1
5484 tmp1 = alpha * pow_r1(n+1) / vscales(indn)
5485 ! k = 0
5486 res1 = zero
5487 do j = n, p
5488 ! Offset for src_m
5489 indj = j*j + j + 1
5490 ! Offsets for vcnk
5491 indjk1 = j*(j+1)/2 + 1
5492 tmp2 = tmp1 * vscales(indj) * pow_r2(j-n+1) * &
5493 & vcnk(indjk1+j-n)**2
5494 res1 = res1 + tmp2*src_m(indj)
5495 end do
5496 dst_m(indn) = res1
5497 ! k != 1
5498 do k = 1, n
5499 res1 = zero
5500 res2 = zero
5501 do j = n, p
5502 ! Offset for src_m
5503 indj = j*j + j + 1
5504 ! Offsets for vcnk
5505 indjk1 = (j-k)*(j-k+1)/2 + 1
5506 indjk2 = (j+k)*(j+k+1)/2 + 1
5507 tmp2 = tmp1 * vscales(indj) * pow_r2(j-n+1) * &
5508 & vcnk(indjk1+j-n) * vcnk(indjk2+j-n)
5509 res1 = res1 + tmp2*src_m(indj+k)
5510 res2 = res2 + tmp2*src_m(indj-k)
5511 end do
5512 dst_m(indn+k) = res1
5513 dst_m(indn-k) = res2
5514 end do
5515 end do
5516 ! Update output if beta is non-zero
5517 else
5518 do n = 0, p
5519 ! Offset for dst_m
5520 indn = n*n + n + 1
5521 tmp1 = alpha * pow_r1(n+1) / vscales(indn)
5522 ! k = 0
5523 res1 = zero
5524 do j = n, p
5525 ! Offset for src_m
5526 indj = j*j + j + 1
5527 ! Offsets for vcnk
5528 indjk1 = j*(j+1)/2 + 1
5529 tmp2 = tmp1 * vscales(indj) * pow_r2(j-n+1) * &
5530 & vcnk(indjk1+j-n)**2
5531 res1 = res1 + tmp2*src_m(indj)
5532 end do
5533 dst_m(indn) = beta*dst_m(indn) + res1
5534 ! k != 1
5535 do k = 1, n
5536 res1 = zero
5537 res2 = zero
5538 do j = n, p
5539 ! Offset for src_m
5540 indj = j*j + j + 1
5541 ! Offsets for vcnk
5542 indjk1 = (j-k)*(j-k+1)/2 + 1
5543 indjk2 = (j+k)*(j+k+1)/2 + 1
5544 tmp2 = tmp1 * vscales(indj) * pow_r2(j-n+1) * &
5545 & vcnk(indjk1+j-n) * vcnk(indjk2+j-n)
5546 res1 = res1 + tmp2*src_m(indj+k)
5547 res2 = res2 + tmp2*src_m(indj-k)
5548 end do
5549 dst_m(indn+k) = beta*dst_m(indn+k) + res1
5550 dst_m(indn-k) = beta*dst_m(indn-k) + res2
5551 end do
5552 end do
5553 end if
5554 ! If harmonics are located at the same point
5555 else
5556 ! Overwrite output if beta is zero
5557 if (beta .eq. zero) then
5558 r1 = dst_r / src_r
5559 tmp1 = alpha * r1
5560 do j = 0, p
5561 indj = j*j + j + 1
5562 res1 = zero
5563 do k = indj-j, indj+j
5564 dst_m(k) = tmp1 * src_m(k)
5565 end do
5566 tmp1 = tmp1 * r1
5567 end do
5568 ! Update output if beta is non-zero
5569 else
5570 r1 = dst_r / src_r
5571 tmp1 = alpha * r1
5572 do j = 0, p
5573 indj = j*j + j + 1
5574 res1 = zero
5575 do k = indj-j, indj+j
5576 dst_m(k) = beta*dst_m(k) + tmp1*src_m(k)
5577 end do
5578 tmp1 = tmp1 * r1
5579 end do
5580 end if
5581 end if
5582end subroutine fmm_m2m_ztranslate_adj_work
5583
5603subroutine fmm_m2m_scale(src_r, dst_r, p, alpha, src_m, beta, dst_m)
5604 ! Inputs
5605 integer, intent(in) :: p
5606 real(dp), intent(in) :: src_r, dst_r, alpha, src_m((p+1)*(p+1)), beta
5607 ! Output
5608 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
5609 ! Local variables
5610 real(dp) :: r1, tmp1
5611 integer :: j, k, indj
5612 ! Init output
5613 if (beta .eq. zero) then
5614 dst_m = zero
5615 else
5616 dst_m = beta * dst_m
5617 end if
5618 r1 = src_r / dst_r
5619 tmp1 = alpha * r1
5620 do j = 0, p
5621 indj = j*j + j + 1
5622 do k = indj-j, indj+j
5623 dst_m(k) = dst_m(k) + src_m(k)*tmp1
5624 end do
5625 tmp1 = tmp1 * r1
5626 end do
5627end subroutine fmm_m2m_scale
5628
5649subroutine fmm_m2m_scale_adj(src_r, dst_r, p, alpha, src_m, beta, dst_m)
5650 ! Inputs
5651 integer, intent(in) :: p
5652 real(dp), intent(in) :: src_r, dst_r, alpha, src_m((p+1)*(p+1)), beta
5653 ! Output
5654 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
5655 ! Local variables
5656 real(dp) :: r1, tmp1
5657 integer :: j, k, indj
5658 ! Init output
5659 if (beta .eq. zero) then
5660 dst_m = zero
5661 else
5662 dst_m = beta * dst_m
5663 end if
5664 r1 = dst_r / src_r
5665 tmp1 = alpha * r1
5666 do j = 0, p
5667 indj = j*j + j + 1
5668 do k = indj-j, indj+j
5669 dst_m(k) = dst_m(k) + src_m(k)*tmp1
5670 end do
5671 tmp1 = tmp1 * r1
5672 end do
5673end subroutine fmm_m2m_scale_adj
5674
5700subroutine fmm_m2m_rotation(c, src_r, dst_r, p, vscales, vcnk, alpha, &
5701 & src_m, beta, dst_m)
5702 ! Inputs
5703 integer, intent(in) :: p
5704 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
5705 & vcnk((2*p+1)*(p+1)), alpha, src_m((p+1)*(p+1)), beta
5706 ! Output
5707 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
5708 ! Temporary workspace
5709 real(dp) :: work(6*p*p + 19*p + 8)
5710 ! Call corresponding work routine
5711 call fmm_m2m_rotation_work(c, src_r, dst_r, p, vscales, vcnk, alpha, &
5712 & src_m, beta, dst_m, work)
5713end subroutine fmm_m2m_rotation
5714
5741subroutine fmm_m2m_rotation_work(c, src_r, dst_r, p, vscales, vcnk, alpha, &
5742 & src_m, beta, dst_m, work)
5743 ! Inputs
5744 integer, intent(in) :: p
5745 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
5746 & vcnk((2*p+1)*(p+1)), alpha, src_m((p+1)*(p+1)), beta
5747 ! Output
5748 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
5749 ! Temporary workspace
5750 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
5751 ! Local variables
5752 real(dp) :: rho, ctheta, stheta, cphi, sphi
5753 integer :: m, n
5754 ! Pointers for temporary values of harmonics
5755 real(dp), pointer :: tmp_m(:), tmp_m2(:), vcos(:), vsin(:)
5756 ! Convert Cartesian coordinates into spherical
5757 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
5758 ! If no need for rotations, just do translation along z
5759 if (stheta .eq. zero) then
5760 ! Workspace here is 2*(p+1)
5761 call fmm_m2m_ztranslate_work(c(3), src_r, dst_r, p, vscales, vcnk, &
5762 & alpha, src_m, beta, dst_m, work)
5763 return
5764 end if
5765 ! Prepare pointers
5766 m = (p+1)**2
5767 n = 4*m + 5*p ! 4*p*p + 13*p + 4
5768 tmp_m(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
5769 n = n + m
5770 tmp_m2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
5771 n = n + m
5772 m = p + 1
5773 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
5774 n = n + m
5775 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
5776 ! Compute arrays of cos and sin that are needed for rotations of harmonics
5777 call trgev(cphi, sphi, p, vcos, vsin)
5778 ! Rotate around OZ axis (work array might appear in the future)
5779 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_m, zero, tmp_m)
5780 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
5781 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_m, zero, &
5782 & tmp_m2, work)
5783 ! OZ translation, workspace here is 2*(p+1)
5784 call fmm_m2m_ztranslate_work(rho, src_r, dst_r, p, vscales, vcnk, one, &
5785 & tmp_m2, zero, tmp_m, work)
5786 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
5787 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_m, zero, tmp_m2, &
5788 & work)
5789 ! Backward rotation around OZ axis (work array might appear in the future)
5790 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_m2, beta, dst_m)
5791end subroutine fmm_m2m_rotation_work
5792
5817subroutine fmm_m2m_bessel_rotation(c, src_r, dst_r, kappa, p, vscales, alpha, &
5818 & src_m, beta, dst_m)
5819 use complex_bessel
5820 ! Inputs
5821 integer, intent(in) :: p
5822 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
5823 & alpha, src_m((p+1)*(p+1)), beta, kappa
5824 ! Output
5825 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
5826 ! Temporary workspace
5827 real(dp) :: work(6*p*p + 19*p + 8), src_sk(p+1), dst_sk(p+1), s1, s2, ck(3)
5828 complex(dp) :: work_complex(2*p+1), z1, z2
5829 integer :: NZ, ierr
5830 ! Compute Bessel functions
5831 z1 = src_r * kappa
5832 z2 = dst_r * kappa
5833 s1 = sqrt(2/(pi*real(z1)))
5834 s2 = sqrt(2/(pi*real(z2)))
5835 call cbesk(z1, pt5, 1, 1, work_complex(1), nz, ierr)
5836 src_sk(1) = s1 * real(work_complex(1))
5837 call cbesk(z2, pt5, 1, 1, work_complex(1), nz, ierr)
5838 dst_sk(1) = s2 * real(work_complex(1))
5839 if (p .gt. 0) then
5840 call cbesk(z1, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
5841 src_sk(2:p+1) = s1 * real(work_complex(2:p+1))
5842 call cbesk(z2, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
5843 dst_sk(2:p+1) = s2 * real(work_complex(2:p+1))
5844 end if
5845 ! Call corresponding work routine
5846 ck = c*kappa
5847 call fmm_m2m_bessel_rotation_work(ck, src_sk, dst_sk, p, vscales, alpha, &
5848 & src_m, beta, dst_m, work, work_complex)
5849end subroutine fmm_m2m_bessel_rotation
5850
5876subroutine fmm_m2m_bessel_rotation_work(c, src_sk, dst_sk, p, vscales, alpha, &
5877 & src_m, beta, dst_m, work, work_complex)
5878 ! Inputs
5879 integer, intent(in) :: p
5880 real(dp), intent(in) :: c(3), src_sk(p+1), dst_sk(p+1), vscales((p+1)*(p+1)), &
5881 & alpha, src_m((p+1)*(p+1)), beta
5882 ! Output
5883 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
5884 ! Temporary workspace
5885 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
5886 complex(dp), intent(out) :: work_complex(2*p+1)
5887 ! Local variables
5888 real(dp) :: rho, ctheta, stheta, cphi, sphi
5889 integer :: m, n
5890 ! Pointers for temporary values of harmonics
5891 real(dp), pointer :: tmp_m(:), tmp_m2(:), vcos(:), vsin(:)
5892 ! Convert Cartesian coordinates into spherical
5893 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
5894 ! If no need for rotations, just do translation along z
5895 !if (stheta .eq. zero) then
5896 ! ! Workspace here is 2*(p+1)
5897 ! call fmm_m2m_bessel_ztranslate_work(c(3), src_r, dst_r, p, vscales, &
5898 ! & alpha, src_m, beta, dst_m, work)
5899 ! return
5900 !end if
5901 ! Prepare pointers
5902 m = (p+1)**2
5903 n = 4*m + 5*p ! 4*p*p + 13*p + 4
5904 tmp_m(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
5905 n = n + m
5906 tmp_m2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
5907 n = n + m
5908 m = p + 1
5909 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
5910 n = n + m
5911 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
5912 ! Compute arrays of cos and sin that are needed for rotations of harmonics
5913 call trgev(cphi, sphi, p, vcos, vsin)
5914 ! Rotate around OZ axis (work array might appear in the future)
5915 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_m, zero, tmp_m)
5916 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
5917 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_m, zero, &
5918 & tmp_m2, work)
5919 ! OZ translation, workspace here is 2*(p+1)
5920 call fmm_m2m_bessel_ztranslate_work(rho, src_sk, dst_sk, p, vscales, one, &
5921 & tmp_m2, zero, tmp_m, work, work_complex)
5922 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
5923 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_m, zero, tmp_m2, &
5924 & work)
5925 ! Backward rotation around OZ axis (work array might appear in the future)
5926 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_m2, beta, dst_m)
5927end subroutine fmm_m2m_bessel_rotation_work
5928
5953subroutine fmm_m2m_bessel_rotation_adj(c, src_r, dst_r, kappa, p, vscales, &
5954 & alpha, src_m, beta, dst_m)
5955 use complex_bessel
5956 ! Inputs
5957 integer, intent(in) :: p
5958 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
5959 & alpha, src_m((p+1)*(p+1)), beta, kappa
5960 ! Output
5961 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
5962 ! Temporary workspace
5963 real(dp) :: work(6*p*p + 19*p + 8), src_sk(p+1), dst_sk(p+1), s1, s2, ck(3)
5964 complex(dp) :: work_complex(2*p+1), z1, z2
5965 integer :: NZ, ierr
5966 ! Compute Bessel functions
5967 z1 = src_r * kappa
5968 z2 = dst_r * kappa
5969 s1 = sqrt(2/(pi*real(z1)))
5970 s2 = sqrt(2/(pi*real(z2)))
5971 call cbesk(z1, pt5, 1, 1, work_complex(1), nz, ierr)
5972 src_sk(1) = s1 * real(work_complex(1))
5973 call cbesk(z2, pt5, 1, 1, work_complex(1), nz, ierr)
5974 dst_sk(1) = s2 * real(work_complex(1))
5975 if (p .gt. 0) then
5976 call cbesk(z1, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
5977 src_sk(2:p+1) = s1 * real(work_complex(2:p+1))
5978 call cbesk(z2, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
5979 dst_sk(2:p+1) = s2 * real(work_complex(2:p+1))
5980 end if
5981 ! Call corresponding work routine
5982 ck = -c*kappa
5983 call fmm_m2m_bessel_rotation_adj_work(ck, dst_sk, src_sk, p, vscales, &
5984 & alpha, src_m, beta, dst_m, work, work_complex)
5985end subroutine fmm_m2m_bessel_rotation_adj
5986
6012subroutine fmm_m2m_bessel_rotation_adj_work(c, src_sk, dst_sk, p, vscales, alpha, &
6013 & src_m, beta, dst_m, work, work_complex)
6014 ! Inputs
6015 integer, intent(in) :: p
6016 real(dp), intent(in) :: c(3), src_sk(p+1), dst_sk(p+1), vscales((p+1)*(p+1)), &
6017 & alpha, src_m((p+1)*(p+1)), beta
6018 ! Output
6019 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
6020 ! Temporary workspace
6021 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
6022 complex(dp), intent(out) :: work_complex(2*p+1)
6023 ! Local variables
6024 real(dp) :: rho, ctheta, stheta, cphi, sphi
6025 integer :: m, n
6026 ! Pointers for temporary values of harmonics
6027 real(dp), pointer :: tmp_m(:), tmp_m2(:), vcos(:), vsin(:)
6028 ! Convert Cartesian coordinates into spherical
6029 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
6030 ! If no need for rotations, just do translation along z
6031 !if (stheta .eq. zero) then
6032 ! ! Workspace here is 2*(p+1)
6033 ! call fmm_m2m_bessel_ztranslate_work(c(3), src_r, dst_r, p, vscales, &
6034 ! & alpha, src_m, beta, dst_m, work)
6035 ! return
6036 !end if
6037 ! Prepare pointers
6038 m = (p+1)**2
6039 n = 4*m + 5*p ! 4*p*p + 13*p + 4
6040 tmp_m(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
6041 n = n + m
6042 tmp_m2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
6043 n = n + m
6044 m = p + 1
6045 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
6046 n = n + m
6047 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
6048 ! Compute arrays of cos and sin that are needed for rotations of harmonics
6049 call trgev(cphi, sphi, p, vcos, vsin)
6050 ! Rotate around OZ axis (work array might appear in the future)
6051 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_m, zero, tmp_m)
6052 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
6053 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_m, zero, &
6054 & tmp_m2, work)
6055 ! OZ translation, workspace here is 2*(p+1)
6056 call fmm_m2m_bessel_ztranslate_adj_work(rho, src_sk, dst_sk, p, vscales, one, &
6057 & tmp_m2, zero, tmp_m, work, work_complex)
6058 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
6059 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_m, zero, tmp_m2, &
6060 & work)
6061 ! Backward rotation around OZ axis (work array might appear in the future)
6062 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_m2, beta, dst_m)
6064
6088subroutine fmm_m2m_rotation_adj(c, src_r, dst_r, p, vscales, vcnk, alpha, &
6089 & src_m, beta, dst_m)
6090 ! Inputs
6091 integer, intent(in) :: p
6092 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
6093 & vcnk((2*p+1)*(p+1)), alpha, src_m((p+1)*(p+1)), beta
6094 ! Output
6095 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
6096 ! Temporary workspace
6097 real(dp) :: work(6*p*p + 19*p + 8)
6098 ! Call corresponding work routine
6099 call fmm_m2m_rotation_adj_work(c, src_r, dst_r, p, vscales, vcnk, alpha, &
6100 & src_m, beta, dst_m, work)
6101end subroutine fmm_m2m_rotation_adj
6102
6127subroutine fmm_m2m_rotation_adj_work(c, src_r, dst_r, p, vscales, vcnk, &
6128 & alpha, src_m, beta, dst_m, work)
6129 ! Inputs
6130 integer, intent(in) :: p
6131 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
6132 & vcnk((2*p+1)*(p+1)), alpha, src_m((p+1)*(p+1)), beta
6133 ! Output
6134 real(dp), intent(inout) :: dst_m((p+1)*(p+1))
6135 ! Temporary workspace
6136 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
6137 ! Local variables
6138 real(dp) :: rho, ctheta, stheta, cphi, sphi
6139 integer :: m, n
6140 ! Pointers for temporary values of harmonics
6141 real(dp), pointer :: tmp_m(:), tmp_m2(:), vcos(:), vsin(:)
6142 ! Covert Cartesian coordinates into spherical
6143 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
6144 ! If no need for rotations, just do translation along z
6145 if (stheta .eq. 0) then
6146 ! Workspace here is 2*(p+1)
6147 call fmm_m2m_ztranslate_adj_work(c(3), src_r, dst_r, p, vscales, &
6148 & vcnk, alpha, src_m, beta, dst_m, work)
6149 return
6150 end if
6151 ! Prepare pointers
6152 m = (p+1)**2
6153 n = 4*m + 5*p ! 4*p*p + 13*p + 4
6154 tmp_m(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
6155 n = n + m
6156 tmp_m2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
6157 n = n + m
6158 m = p + 1
6159 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
6160 n = n + m
6161 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
6162 ! Compute arrays of cos and sin that are needed for rotations of harmonics
6163 call trgev(cphi, sphi, p, vcos, vsin)
6164 ! Rotate around OZ axis (work array might appear in the future)
6165 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_m, zero, tmp_m)
6166 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
6167 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_m, zero, &
6168 & tmp_m2, work)
6169 ! OZ translation, workspace here is 2*(p+1)
6170 call fmm_m2m_ztranslate_adj_work(rho, src_r, dst_r, p, vscales, vcnk, &
6171 & one, tmp_m2, zero, tmp_m, work)
6172 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
6173 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_m, zero, tmp_m2, &
6174 & work)
6175 ! Backward rotation around OZ axis (work array might appear in the future)
6176 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_m2, beta, dst_m)
6177end subroutine fmm_m2m_rotation_adj_work
6178
6179subroutine fmm_m2m_bessel_grad(p, sph_sk, vscales, sph_m, sph_m_grad)
6180 !! Inputs
6181 integer, intent(in) :: p
6182 real(dp), intent(in) :: sph_sk(p+2), vscales((p+2)**2), &
6183 & sph_m((p+1)**2)
6184 !! Output
6185 real(dp), intent(out) :: sph_m_grad((p+2)**2, 3)
6186 !! Local variables
6187 real(dp), dimension(3, 3) :: zx_coord_transform, zy_coord_transform
6188 real(dp) :: tmp((p+1)**2), tmp2((p+2)**2)
6189 ! Set coordinate transformations
6190 zx_coord_transform = zero
6191 zx_coord_transform(3, 2) = one
6192 zx_coord_transform(2, 3) = one
6193 zx_coord_transform(1, 1) = one
6194 zy_coord_transform = zero
6195 zy_coord_transform(1, 2) = one
6196 zy_coord_transform(2, 1) = one
6197 zy_coord_transform(3, 3) = one
6198 ! Transform input harmonics for OX axis
6199 call fmm_sph_transform(p, zx_coord_transform, one, sph_m, zero, tmp)
6200 ! Differentiate M2M along OZ
6201 call fmm_m2m_bessel_derivative_ztranslate_work(sph_sk, p, vscales, &
6202 & one, tmp, zero, tmp2)
6203 ! Transform into output harmonics for OX axis
6204 call fmm_sph_transform(p+1, zx_coord_transform, one, tmp2, zero, &
6205 & sph_m_grad(:, 1))
6206 ! Transform input harmonics for OY axis
6207 call fmm_sph_transform(p, zy_coord_transform, one, sph_m, zero, tmp)
6208 ! Differentiate M2M along OZ
6209 call fmm_m2m_bessel_derivative_ztranslate_work(sph_sk, p, vscales, &
6210 & one, tmp, zero, tmp2)
6211 ! Transform into output harmonics for OY axis
6212 call fmm_sph_transform(p+1, zy_coord_transform, one, tmp2, zero, &
6213 & sph_m_grad(:, 2))
6214 ! Differentiate for OZ axis
6215 call fmm_m2m_bessel_derivative_ztranslate_work(sph_sk, p, vscales, &
6216 & one, sph_m, zero, sph_m_grad(:, 3))
6217end subroutine fmm_m2m_bessel_grad
6218
6241subroutine fmm_l2l_ztranslate(z, src_r, dst_r, p, vscales, vfact, alpha, &
6242 & src_l, beta, dst_l)
6243 ! Inputs
6244 integer, intent(in) :: p
6245 real(dp), intent(in) :: z, src_r, dst_r, vscales((p+1)*(p+1)), &
6246 & vfact(2*p+1), alpha, src_l((p+1)*(p+1)), beta
6247 ! Output
6248 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
6249 ! Temporary workspace
6250 real(dp) :: work(2*(p+1))
6251 ! Call corresponding work routine
6252 call fmm_l2l_ztranslate_work(z, src_r, dst_r, p, vscales, vfact, alpha, &
6253 & src_l, beta, dst_l, work)
6254end subroutine fmm_l2l_ztranslate
6255
6279subroutine fmm_l2l_ztranslate_work(z, src_r, dst_r, p, vscales, vfact, alpha, &
6280 & src_l, beta, dst_l, work)
6281 ! Inputs
6282 integer, intent(in) :: p
6283 real(dp), intent(in) :: z, src_r, dst_r, vscales((p+1)*(p+1)), &
6284 & vfact(2*p+1), alpha, src_l((p+1)*(p+1)), beta
6285 ! Output
6286 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
6287 ! Temporary workspace
6288 real(dp), intent(out), target :: work(2*(p+1))
6289 ! Local variables
6290 real(dp) :: r1, r2, tmp1, tmp2
6291 integer :: j, k, n, indj, indn
6292 ! Pointers for temporary values of powers
6293 real(dp), pointer :: pow_r1(:), pow_r2(:)
6294 ! Init output
6295 if (beta .eq. zero) then
6296 dst_l = zero
6297 else
6298 dst_l = beta * dst_l
6299 end if
6300 ! If harmonics have different centers
6301 if (z .ne. 0) then
6302 ! Prepare pointers
6303 pow_r1(1:p+1) => work(1:p+1)
6304 pow_r2(1:p+1) => work(p+2:2*(p+1))
6305 ! Get powers of r1 and r2
6306 r1 = z / src_r
6307 r2 = dst_r / z
6308 pow_r1(1) = 1
6309 pow_r2(1) = 1
6310 do j = 2, p+1
6311 pow_r1(j) = pow_r1(j-1) * r1
6312 pow_r2(j) = pow_r2(j-1) * r2
6313 end do
6314 ! Do actual L2L
6315 do j = 0, p
6316 indj = j*j + j + 1
6317 do k = 0, j
6318 tmp1 = alpha * pow_r2(j+1) / vfact(j-k+1) / vfact(j+k+1) * &
6319 & vscales(indj)
6320 do n = j, p
6321 indn = n*n + n + 1
6322 tmp2 = tmp1 * pow_r1(n+1) / vscales(indn) * &
6323 & vfact(n-k+1) * vfact(n+k+1) / vfact(n-j+1) / &
6324 & vfact(n-j+1)
6325 if (mod(n+j, 2) .eq. 1) then
6326 tmp2 = -tmp2
6327 end if
6328 if (k .eq. 0) then
6329 dst_l(indj) = dst_l(indj) + tmp2*src_l(indn)
6330 else
6331 dst_l(indj+k) = dst_l(indj+k) + tmp2*src_l(indn+k)
6332 dst_l(indj-k) = dst_l(indj-k) + tmp2*src_l(indn-k)
6333 end if
6334 end do
6335 end do
6336 end do
6337 ! If harmonics are located at the same point
6338 else
6339 r1 = dst_r / src_r
6340 tmp1 = alpha
6341 do j = 0, p
6342 indj = j*j + j + 1
6343 do k = indj-j, indj+j
6344 dst_l(k) = dst_l(k) + src_l(k)*tmp1
6345 end do
6346 tmp1 = tmp1 * r1
6347 end do
6348 end if
6349end subroutine fmm_l2l_ztranslate_work
6350
6372subroutine fmm_l2l_bessel_ztranslate(z, src_si, dst_si, p, vscales, alpha, &
6373 & src_l, beta, dst_l)
6374 ! Inputs
6375 integer, intent(in) :: p
6376 real(dp), intent(in) :: z, src_si(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
6377 & alpha, src_l((p+1)*(p+1)), beta
6378 ! Output
6379 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
6380 ! Temporary workspace
6381 real(dp) :: work(2*p+1)
6382 complex(dp) :: work_complex(2*p+1)
6383 ! Call corresponding work routine
6384 call fmm_l2l_bessel_ztranslate_work(z, src_si, dst_si, p, vscales, &
6385 & alpha, src_l, beta, dst_l, work, work_complex)
6386end subroutine fmm_l2l_bessel_ztranslate
6387
6411subroutine fmm_l2l_bessel_ztranslate_work(z, src_si, dst_si, p, vscales, &
6412 & alpha, src_l, beta, dst_l, work, work_complex)
6413 use complex_bessel
6414 ! Inputs
6415 integer, intent(in) :: p
6416 real(dp), intent(in) :: z, src_si(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
6417 & alpha, src_l((p+1)*(p+1)), beta
6418 ! Output
6419 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
6420 ! Temporary workspace
6421 real(dp), intent(out), target :: work(2*p+1)
6422 complex(dp), intent(out) :: work_complex(2*p+1)
6423 ! Local variables
6424 real(dp) :: r1, tmp1, tmp2, tmp3, res1, res2
6425 integer :: j, k, n, indj, indn, l, NZ, ierr
6426 ! Pointers for temporary values of powers
6427 real(dp) :: fact(2*p+1)
6428 complex(dp) :: complex_argument
6429 ! In case alpha is zero just do a proper scaling of output
6430 if (alpha .eq. zero) then
6431 if (beta .eq. zero) then
6432 dst_l = zero
6433 else
6434 dst_l = beta * dst_l
6435 end if
6436 return
6437 end if
6438 ! Get factorials
6439 fact(1) = one
6440 do j = 1, 2*p
6441 fact(j+1) = dble(j) * fact(j)
6442 end do
6443 ! Now alpha is non-zero
6444 ! If harmonics have different centers
6445 if (z .ne. 0) then
6446 complex_argument = z
6447 call cbesi(complex_argument, pt5, 1, 1, work_complex(1), nz, ierr)
6448 work(1) = sqrt(pi/(2*z)) * real(work_complex(1))
6449 if (p .gt. 0) then
6450 call cbesi(complex_argument, 1.5d0, 1, 2*p, &
6451 & work_complex(2:2*p+1), nz, ierr)
6452 work(2:2*p+1) = sqrt(pi/(2*z)) * real(work_complex(2:2*p+1))
6453 end if
6454 ! Do actual M2M
6455 ! Overwrite output if beta is zero
6456 if (beta .eq. zero) then
6457 dst_l = zero
6458 do j = 0, p
6459 ! Offset for dst_l
6460 indj = j*j + j + 1
6461 ! k = 0
6462 k = 0
6463 res1 = zero
6464 do n = 0, p
6465 ! Offset for src_l
6466 indn = n*n + n + 1
6467 tmp1 = vscales(indn) * ((-one)**(j+n)) * &
6468 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
6469 & src_si(n+1) / vscales(indj) * dst_si(j+1) * alpha
6470 tmp3 = zero
6471 do l = 0, min(j, n)
6472 tmp2 = (two**(-l)) / &
6473 & fact(l+1)*fact(2*l+1)/ &
6474 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
6475 & (work(j+n-l+1) / (z**l))
6476 tmp3 = tmp3 + tmp2
6477 end do
6478 tmp3 = tmp3 * tmp1
6479 res1 = res1 + tmp3*src_l(indn)
6480 end do
6481 dst_l(indj) = res1
6482 ! k != 0
6483 do k = 1, j
6484 res1 = zero
6485 res2 = zero
6486 do n = k, p
6487 ! Offset for src_l
6488 indn = n*n + n + 1
6489 tmp1 = vscales(indn+k) * ((-one)**(j+n)) * &
6490 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
6491 & src_si(n+1) / vscales(indj+k) * dst_si(j+1) * alpha
6492 tmp3 = zero
6493 do l = k, min(j, n)
6494 tmp2 = (two**(-l)) / &
6495 & fact(l+k+1)*fact(2*l+1)/ &
6496 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
6497 & (work(j+n-l+1) / (z**l))
6498 tmp3 = tmp3 + tmp2
6499 end do
6500 tmp3 = tmp3 * tmp1
6501 res1 = res1 + tmp3*src_l(indn+k)
6502 res2 = res2 + tmp3*src_l(indn-k)
6503 end do
6504 dst_l(indj+k) = res1
6505 dst_l(indj-k) = res2
6506 end do
6507 end do
6508 ! Update output if beta is non-zero
6509 else
6510 do j = 0, p
6511 ! Offset for dst_l
6512 indj = j*j + j + 1
6513 ! k = 0
6514 k = 0
6515 res1 = zero
6516 do n = 0, p
6517 ! Offset for src_l
6518 indn = n*n + n + 1
6519 tmp1 = vscales(indn) * ((-one)**(j+n)) * &
6520 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
6521 & src_si(n+1) / vscales(indj) * dst_si(j+1) * alpha
6522 tmp3 = zero
6523 do l = 0, min(j, n)
6524 tmp2 = (two**(-l)) / &
6525 & fact(l+1)*fact(2*l+1)/ &
6526 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
6527 & (work(j+n-l+1) / (z**l))
6528 tmp3 = tmp3 + tmp2
6529 end do
6530 tmp3 = tmp3 * tmp1
6531 res1 = res1 + tmp3*src_l(indn)
6532 end do
6533 dst_l(indj) = beta*dst_l(indj) + res1
6534 ! k != 0
6535 do k = 1, j
6536 res1 = zero
6537 res2 = zero
6538 do n = k, p
6539 ! Offset for src_l
6540 indn = n*n + n + 1
6541 tmp1 = vscales(indn+k) * ((-one)**(j+n)) * &
6542 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
6543 & src_si(n+1) / vscales(indj+k) * dst_si(j+1) * alpha
6544 tmp3 = zero
6545 do l = k, min(j, n)
6546 tmp2 = (two**(-l)) / &
6547 & fact(l+k+1)*fact(2*l+1)/ &
6548 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
6549 & (work(j+n-l+1) / (z**l))
6550 tmp3 = tmp3 + tmp2
6551 end do
6552 tmp3 = tmp3 * tmp1
6553 res1 = res1 + tmp3*src_l(indn+k)
6554 res2 = res2 + tmp3*src_l(indn-k)
6555 end do
6556 dst_l(indj+k) = beta*dst_l(indj+k) + res1
6557 dst_l(indj-k) = beta*dst_l(indj-k) + res2
6558 end do
6559 end do
6560 end if
6561 ! If harmonics are located at the same point
6562 else
6563 r1 = zero
6564 ! Overwrite output if beta is zero
6565 if (beta .eq. zero) then
6566 !r1 = src_r / dst_r
6567 tmp1 = alpha * r1
6568 do j = 0, p
6569 indj = j*j + j + 1
6570 do k = indj-j, indj+j
6571 dst_l(k) = tmp1 * src_l(k)
6572 end do
6573 tmp1 = tmp1 * r1
6574 end do
6575 ! Update output if beta is non-zero
6576 else
6577 !r1 = src_r / dst_r
6578 tmp1 = alpha * r1
6579 do j = 0, p
6580 indj = j*j + j + 1
6581 do k = indj-j, indj+j
6582 dst_l(k) = beta*dst_l(k) + tmp1*src_l(k)
6583 end do
6584 tmp1 = tmp1 * r1
6585 end do
6586 end if
6587 end if
6588end subroutine fmm_l2l_bessel_ztranslate_work
6589
6611subroutine fmm_l2l_bessel_ztranslate_adj(z, src_si, dst_si, p, vscales, alpha, &
6612 & src_l, beta, dst_l)
6613 ! Inputs
6614 integer, intent(in) :: p
6615 real(dp), intent(in) :: z, src_si(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
6616 & alpha, src_l((p+1)*(p+1)), beta
6617 ! Output
6618 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
6619 ! Temporary workspace
6620 real(dp) :: work(2*p+1)
6621 complex(dp) :: work_complex(2*p+1)
6622 ! Call corresponding work routine
6623 call fmm_l2l_bessel_ztranslate_adj_work(z, src_si, dst_si, p, vscales, &
6624 & alpha, src_l, beta, dst_l, work, work_complex)
6625end subroutine fmm_l2l_bessel_ztranslate_adj
6626
6650subroutine fmm_l2l_bessel_ztranslate_adj_work(z, src_si, dst_si, p, vscales, &
6651 & alpha, src_l, beta, dst_l, work, work_complex)
6652 use complex_bessel
6653 ! Inputs
6654 integer, intent(in) :: p
6655 real(dp), intent(in) :: z, src_si(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
6656 & alpha, src_l((p+1)*(p+1)), beta
6657 ! Output
6658 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
6659 ! Temporary workspace
6660 real(dp), intent(out), target :: work(2*p+1)
6661 complex(dp), intent(out) :: work_complex(2*p+1)
6662 ! Local variables
6663 real(dp) :: r1, tmp1, tmp2, tmp3, res1, res2
6664 integer :: j, k, n, indj, indn, l, NZ, ierr
6665 ! Pointers for temporary values of powers
6666 real(dp) :: fact(2*p+1)
6667 complex(dp) :: complex_argument
6668 ! In case alpha is zero just do a proper scaling of output
6669 if (alpha .eq. zero) then
6670 if (beta .eq. zero) then
6671 dst_l = zero
6672 else
6673 dst_l = beta * dst_l
6674 end if
6675 return
6676 end if
6677 ! Get factorials
6678 fact(1) = one
6679 do j = 1, 2*p
6680 fact(j+1) = dble(j) * fact(j)
6681 end do
6682 ! Now alpha is non-zero
6683 ! If harmonics have different centers
6684 if (z .ne. 0) then
6685 complex_argument = z
6686 call cbesi(complex_argument, pt5, 1, 1, work_complex(1), nz, ierr)
6687 work(1) = sqrt(pi/(2*z)) * real(work_complex(1))
6688 if (p .gt. 0) then
6689 call cbesi(complex_argument, 1.5d0, 1, 2*p, &
6690 & work_complex(2:2*p+1), nz, ierr)
6691 work(2:2*p+1) = sqrt(pi/(2*z)) * real(work_complex(2:2*p+1))
6692 end if
6693 ! Do actual M2M
6694 ! Overwrite output if beta is zero
6695 if (beta .eq. zero) then
6696 dst_l = zero
6697 do n = 0, p
6698 ! Offset for dst_l
6699 indn = n*n + n + 1
6700 ! k = 0
6701 k = 0
6702 res1 = zero
6703 do j = 0, p
6704 ! Offset for src_l
6705 indj = j*j + j + 1
6706 tmp1 = vscales(indn) * ((-one)**(j+n)) * &
6707 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
6708 & src_si(n+1) / vscales(indj) * dst_si(j+1) * alpha
6709 tmp3 = zero
6710 do l = 0, min(j, n)
6711 tmp2 = (two**(-l)) / &
6712 & fact(l+1)*fact(2*l+1)/ &
6713 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
6714 & (work(j+n-l+1) / (z**l))
6715 tmp3 = tmp3 + tmp2
6716 end do
6717 tmp3 = tmp3 * tmp1
6718 res1 = res1 + tmp3*src_l(indj)
6719 end do
6720 dst_l(indn) = res1
6721 ! k != 0
6722 do k = 1, n
6723 res1 = zero
6724 res2 = zero
6725 do j = k, p
6726 ! Offset for src_l
6727 indj = j*j + j + 1
6728 tmp1 = vscales(indn+k) * ((-one)**(j+n)) * &
6729 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
6730 & src_si(n+1) / vscales(indj+k) * dst_si(j+1) * alpha
6731 tmp3 = zero
6732 do l = k, min(j, n)
6733 tmp2 = (two**(-l)) / &
6734 & fact(l+k+1)*fact(2*l+1)/ &
6735 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
6736 & (work(j+n-l+1) / (z**l))
6737 tmp3 = tmp3 + tmp2
6738 end do
6739 tmp3 = tmp3 * tmp1
6740 res1 = res1 + tmp3*src_l(indj+k)
6741 res2 = res2 + tmp3*src_l(indj-k)
6742 end do
6743 dst_l(indn+k) = res1
6744 dst_l(indn-k) = res2
6745 end do
6746 end do
6747 ! Update output if beta is non-zero
6748 else
6749 do n = 0, p
6750 ! Offset for dst_l
6751 indn = n*n + n + 1
6752 ! k = 0
6753 k = 0
6754 res1 = zero
6755 do j = 0, p
6756 ! Offset for src_l
6757 indj = j*j + j + 1
6758 tmp1 = vscales(indn) * ((-one)**(j+n)) * &
6759 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
6760 !& src_si(n+1) / vscales(indj) * dst_si(j+1) * alpha
6761 & src_si(n+1) / vscales(indj) * dst_si(j+1) * alpha
6762 tmp3 = zero
6763 do l = 0, min(j, n)
6764 tmp2 = (two**(-l)) / &
6765 & fact(l+1)*fact(2*l+1)/ &
6766 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
6767 & (work(j+n-l+1) / (z**l))
6768 tmp3 = tmp3 + tmp2
6769 end do
6770 tmp3 = tmp3 * tmp1
6771 res1 = res1 + tmp3*src_l(indj)
6772 end do
6773 dst_l(indn) = beta*dst_l(indn) + res1
6774 ! k != 0
6775 do k = 1, n
6776 res1 = zero
6777 res2 = zero
6778 do j = k, p
6779 ! Offset for src_l
6780 indj = j*j + j + 1
6781 tmp1 = vscales(indn+k) * ((-one)**(j+n)) * &
6782 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
6783 & src_si(n+1) / vscales(indj+k) * dst_si(j+1) * alpha
6784 tmp3 = zero
6785 do l = k, min(j, n)
6786 tmp2 = (two**(-l)) / &
6787 & fact(l+k+1)*fact(2*l+1)/ &
6788 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
6789 & (work(j+n-l+1) / (z**l))
6790 tmp3 = tmp3 + tmp2
6791 end do
6792 tmp3 = tmp3 * tmp1
6793 res1 = res1 + tmp3*src_l(indj+k)
6794 res2 = res2 + tmp3*src_l(indj-k)
6795 end do
6796 dst_l(indn+k) = beta*dst_l(indn+k) + res1
6797 dst_l(indn-k) = beta*dst_l(indn-k) + res2
6798 end do
6799 end do
6800 end if
6801 ! If harmonics are located at the same point
6802 else
6803 r1 = zero
6804 ! Overwrite output if beta is zero
6805 if (beta .eq. zero) then
6806 !r1 = src_r / dst_r
6807 tmp1 = alpha * r1
6808 do j = 0, p
6809 indj = j*j + j + 1
6810 do k = indj-j, indj+j
6811 dst_l(k) = tmp1 * src_l(k)
6812 end do
6813 tmp1 = tmp1 * r1
6814 end do
6815 ! Update output if beta is non-zero
6816 else
6817 !r1 = src_r / dst_r
6818 tmp1 = alpha * r1
6819 do j = 0, p
6820 indj = j*j + j + 1
6821 do k = indj-j, indj+j
6822 dst_l(k) = beta*dst_l(k) + tmp1*src_l(k)
6823 end do
6824 tmp1 = tmp1 * r1
6825 end do
6826 end if
6827 end if
6829
6853subroutine fmm_l2l_bessel_derivative_ztranslate_work(src_si, p, vscales, &
6854 & alpha, src_l, beta, dst_l)
6855 use complex_bessel
6856 ! Inputs
6857 integer, intent(in) :: p
6858 real(dp), intent(in) :: src_si(p+2), vscales((p+2)*(p+2)), &
6859 & alpha, src_l((p+1)*(p+1)), beta
6860 ! Output
6861 real(dp), intent(inout) :: dst_l((p+2)*(p+2))
6862 ! Derivative of the L2L is just a negative derivative of the M2M
6863 call fmm_m2m_bessel_derivative_ztranslate_work(src_si, p, vscales, &
6864 & -alpha, src_l, beta, dst_l)
6866
6889subroutine fmm_l2l_ztranslate_adj(z, src_r, dst_r, p, vscales, vfact, &
6890 & alpha, src_l, beta, dst_l)
6891 ! Inputs
6892 integer, intent(in) :: p
6893 real(dp), intent(in) :: z, src_r, dst_r, vscales((p+1)*(p+1)), &
6894 & vfact(2*p+1), alpha, src_l((p+1)*(p+1)), beta
6895 ! Output
6896 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
6897 ! Temporary workspace
6898 real(dp) :: work(2*(p+1))
6899 ! Call corresponding work routine
6900 call fmm_l2l_ztranslate_adj_work(z, src_r, dst_r, p, vscales, vfact, &
6901 & alpha, src_l, beta, dst_l, work)
6902end subroutine fmm_l2l_ztranslate_adj
6903
6927subroutine fmm_l2l_ztranslate_adj_work(z, src_r, dst_r, p, vscales, vfact, &
6928 & alpha, src_l, beta, dst_l, work)
6929 ! Inputs
6930 integer, intent(in) :: p
6931 real(dp), intent(in) :: z, src_r, dst_r, vscales((p+1)*(p+1)), &
6932 & vfact(2*p+1), alpha, src_l((p+1)*(p+1)), beta
6933 ! Output
6934 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
6935 ! Temporary workspace
6936 real(dp), intent(out), target :: work(2*(p+1))
6937 ! Local variables
6938 real(dp) :: r1, r2, tmp1, tmp2
6939 integer :: j, k, n, indj, indn
6940 ! Pointers for temporary values of powers
6941 real(dp), pointer :: pow_r1(:), pow_r2(:)
6942 ! Init output
6943 if (beta .eq. zero) then
6944 dst_l = zero
6945 else
6946 dst_l = beta * dst_l
6947 end if
6948 ! If harmonics have different centers
6949 if (z .ne. 0) then
6950 ! Prepare pointers
6951 pow_r1(1:p+1) => work(1:p+1)
6952 pow_r2(1:p+1) => work(p+2:2*(p+1))
6953 ! Get powers of r1 and r2
6954 r1 = -z / dst_r
6955 r2 = -src_r / z
6956 pow_r1(1) = one
6957 pow_r2(1) = one
6958 do j = 2, p+1
6959 pow_r1(j) = pow_r1(j-1) * r1
6960 pow_r2(j) = pow_r2(j-1) * r2
6961 end do
6962 do j = 0, p
6963 indj = j*j + j + 1
6964 do k = 0, j
6965 tmp1 = alpha * pow_r2(j+1) / vfact(j-k+1) / vfact(j+k+1) * &
6966 & vscales(indj)
6967 do n = j, p
6968 indn = n*n + n + 1
6969 tmp2 = tmp1 * pow_r1(n+1) / vscales(indn) * &
6970 & vfact(n-k+1) * vfact(n+k+1) / vfact(n-j+1) / &
6971 & vfact(n-j+1)
6972 if (mod(n+j, 2) .eq. 1) then
6973 tmp2 = -tmp2
6974 end if
6975 if (k .eq. 0) then
6976 dst_l(indn) = dst_l(indn) + tmp2*src_l(indj)
6977 else
6978 dst_l(indn+k) = dst_l(indn+k) + tmp2*src_l(indj+k)
6979 dst_l(indn-k) = dst_l(indn-k) + tmp2*src_l(indj-k)
6980 end if
6981 end do
6982 end do
6983 end do
6984 ! If harmonics are located at the same point
6985 else
6986 r1 = src_r / dst_r
6987 tmp1 = alpha
6988 do j = 0, p
6989 indj = j*j + j + 1
6990 do k = indj-j, indj+j
6991 dst_l(k) = dst_l(k) + src_l(k)*tmp1
6992 end do
6993 tmp1 = tmp1 * r1
6994 end do
6995 end if
6996end subroutine fmm_l2l_ztranslate_adj_work
6997
7017subroutine fmm_l2l_scale(src_r, dst_r, p, alpha, src_l, beta, dst_l)
7018 ! Inputs
7019 integer, intent(in) :: p
7020 real(dp), intent(in) :: src_r, dst_r, alpha, src_l((p+1)*(p+1)), beta
7021 ! Output
7022 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7023 ! Local variables
7024 real(dp) :: r1, tmp1
7025 integer :: j, k, indj
7026 ! Init output
7027 if (beta .eq. zero) then
7028 dst_l = zero
7029 else
7030 dst_l = beta * dst_l
7031 end if
7032 r1 = dst_r / src_r
7033 tmp1 = alpha
7034 do j = 0, p
7035 indj = j*j + j + 1
7036 do k = indj-j, indj+j
7037 dst_l(k) = dst_l(k) + src_l(k)*tmp1
7038 end do
7039 tmp1 = tmp1 * r1
7040 end do
7041end subroutine fmm_l2l_scale
7042
7062subroutine fmm_l2l_scale_adj(src_r, dst_r, p, alpha, src_l, beta, dst_l)
7063 ! Inputs
7064 integer, intent(in) :: p
7065 real(dp), intent(in) :: src_r, dst_r, alpha, src_l((p+1)*(p+1)), beta
7066 ! Output
7067 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7068 ! Local variables
7069 real(dp) :: r1, tmp1
7070 integer :: j, k, indj
7071 ! Init output
7072 if (beta .eq. zero) then
7073 dst_l = zero
7074 else
7075 dst_l = beta * dst_l
7076 end if
7077 r1 = src_r / dst_r
7078 tmp1 = alpha
7079 do j = 0, p
7080 indj = j*j + j + 1
7081 do k = indj-j, indj+j
7082 dst_l(k) = dst_l(k) + src_l(k)*tmp1
7083 end do
7084 tmp1 = tmp1 * r1
7085 end do
7086end subroutine fmm_l2l_scale_adj
7087
7113subroutine fmm_l2l_rotation(c, src_r, dst_r, p, vscales, vfact, alpha, &
7114 & src_l, beta, dst_l)
7115 ! Inputs
7116 integer, intent(in) :: p
7117 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
7118 & vfact(2*p+1), alpha, src_l((p+1)*(p+1)), beta
7119 ! Output
7120 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7121 ! Temporary workspace
7122 real(dp) :: work(6*p*p+19*p+8)
7123 ! Call corresponding work routine
7124 call fmm_l2l_rotation_work(c, src_r, dst_r, p, vscales, vfact, alpha, &
7125 & src_l, beta, dst_l, work)
7126end subroutine fmm_l2l_rotation
7127
7154subroutine fmm_l2l_rotation_work(c, src_r, dst_r, p, vscales, vfact, alpha, &
7155 & src_l, beta, dst_l, work)
7156 ! Inputs
7157 integer, intent(in) :: p
7158 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
7159 & vfact(2*p+1), alpha, src_l((p+1)*(p+1)), beta
7160 ! Output
7161 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7162 ! Temporary workspace
7163 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
7164 ! Local variables
7165 real(dp) :: rho, ctheta, stheta, cphi, sphi
7166 integer :: m, n
7167 ! Pointers for temporary values of harmonics
7168 real(dp), pointer :: tmp_l(:), tmp_l2(:), vcos(:), vsin(:)
7169 ! Covert Cartesian coordinates into spherical
7170 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
7171 ! If no need for rotations, just do translation along z
7172 if (stheta .eq. zero) then
7173 ! Workspace here is 2*(p+1)
7174 call fmm_l2l_ztranslate_work(c(3), src_r, dst_r, p, vscales, vfact, &
7175 & alpha, src_l, beta, dst_l, work)
7176 return
7177 end if
7178 ! Prepare pointers
7179 m = (p+1)**2
7180 n = 4*m + 5*p ! 4*p*p + 13*p + 4
7181 tmp_l(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
7182 n = n + m
7183 tmp_l2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
7184 n = n + m
7185 m = p + 1
7186 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
7187 n = n + m
7188 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
7189 ! Compute arrays of cos and sin that are needed for rotations of harmonics
7190 call trgev(cphi, sphi, p, vcos, vsin)
7191 ! Rotate around OZ axis (work array might appear in the future)
7192 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_l, zero, tmp_l)
7193 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
7194 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_l, zero, &
7195 & tmp_l2, work)
7196 ! OZ translation, workspace here is 2*(p+1)
7197 call fmm_l2l_ztranslate_work(rho, src_r, dst_r, p, vscales, vfact, one, &
7198 & tmp_l2, zero, tmp_l, work)
7199 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
7200 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_l, zero, tmp_l2, &
7201 & work)
7202 ! Backward rotation around OZ axis (work array might appear in the future)
7203 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_l2, beta, dst_l)
7204end subroutine fmm_l2l_rotation_work
7205
7230subroutine fmm_l2l_bessel_rotation(c, src_r, dst_r, kappa, p, vscales, alpha, &
7231 & src_l, beta, dst_l)
7232 use complex_bessel
7233 ! Inputs
7234 integer, intent(in) :: p
7235 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
7236 & alpha, src_l((p+1)*(p+1)), beta, kappa
7237 ! Output
7238 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7239 ! Temporary workspace
7240 real(dp) :: work(6*p*p + 19*p + 8), src_si(p+1), dst_si(p+1), s1, s2
7241 complex(dp) :: work_complex(2*p+1), z1, z2
7242 integer :: NZ, ierr
7243 ! Compute Bessel functions
7244 z1 = src_r * kappa
7245 z2 = dst_r * kappa
7246 s1 = sqrt(pi/(2*real(z1)))
7247 s2 = sqrt(pi/(2*real(z2)))
7248 call cbesi(z1, pt5, 1, 1, work_complex(1), nz, ierr)
7249 src_si(1) = s1 * real(work_complex(1))
7250 call cbesi(z2, pt5, 1, 1, work_complex(1), nz, ierr)
7251 dst_si(1) = s2 * real(work_complex(1))
7252 if (p .gt. 0) then
7253 call cbesi(z1, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
7254 src_si(2:p+1) = s1 * real(work_complex(2:p+1))
7255 call cbesi(z2, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
7256 dst_si(2:p+1) = s2 * real(work_complex(2:p+1))
7257 end if
7258 ! Call corresponding work routine
7259 call fmm_l2l_bessel_rotation_work(c*kappa, src_si, dst_si, p, vscales, alpha, &
7260 & src_l, beta, dst_l, work, work_complex)
7261end subroutine fmm_l2l_bessel_rotation
7262
7288subroutine fmm_l2l_bessel_rotation_work(c, src_si, dst_si, p, vscales, alpha, &
7289 & src_l, beta, dst_l, work, work_complex)
7290 ! Inputs
7291 integer, intent(in) :: p
7292 real(dp), intent(in) :: c(3), src_si(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
7293 & alpha, src_l((p+1)*(p+1)), beta
7294 ! Output
7295 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7296 ! Temporary workspace
7297 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
7298 complex(dp), intent(out) :: work_complex(2*p+1)
7299 ! Local variables
7300 real(dp) :: rho, ctheta, stheta, cphi, sphi
7301 integer :: m, n
7302 ! Pointers for temporary values of harmonics
7303 real(dp), pointer :: tmp_l(:), tmp_l2(:), vcos(:), vsin(:)
7304 ! Convert Cartesian coordinates into spherical
7305 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
7306 ! If no need for rotations, just do translation along z
7307 !if (stheta .eq. zero) then
7308 ! call fmm_l2l_bessel_ztranslate_work(abs(c(3)), src_si, dst_si, p, &
7309 ! & vscales, one, src_l, beta, dst_l, work, work_complex)
7310 ! return
7311 !end if
7312 ! Prepare pointers
7313 m = (p+1)**2
7314 n = 4*m + 5*p ! 4*p*p + 13*p + 4
7315 tmp_l(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
7316 n = n + m
7317 tmp_l2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
7318 n = n + m
7319 m = p + 1
7320 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
7321 n = n + m
7322 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
7323 ! Compute arrays of cos and sin that are needed for rotations of harmonics
7324 call trgev(cphi, sphi, p, vcos, vsin)
7325 ! Rotate around OZ axis (work array might appear in the future)
7326 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_l, zero, tmp_l)
7327 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
7328 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_l, zero, &
7329 & tmp_l2, work)
7330 ! OZ translation, workspace here is 2*(p+1)
7331 call fmm_l2l_bessel_ztranslate_work(rho, src_si, dst_si, p, vscales, &
7332 & one, tmp_l2, zero, tmp_l, work, work_complex)
7333 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
7334 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_l, zero, tmp_l2, &
7335 & work)
7336 ! Backward rotation around OZ axis (work array might appear in the future)
7337 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_l2, beta, dst_l)
7338end subroutine fmm_l2l_bessel_rotation_work
7339
7364subroutine fmm_l2l_bessel_rotation_adj(c, src_r, dst_r, kappa, p, vscales, alpha, &
7365 & src_l, beta, dst_l)
7366 use complex_bessel
7367 ! Inputs
7368 integer, intent(in) :: p
7369 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
7370 & alpha, src_l((p+1)*(p+1)), beta, kappa
7371 ! Output
7372 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7373 ! Temporary workspace
7374 real(dp) :: work(6*p*p + 19*p + 8), src_si(p+1), dst_si(p+1), s1, s2
7375 complex(dp) :: work_complex(2*p+1), z1, z2
7376 integer :: NZ, ierr
7377 ! Compute Bessel functions
7378 z1 = src_r * kappa
7379 z2 = dst_r * kappa
7380 s1 = sqrt(pi/(2*real(z1)))
7381 s2 = sqrt(pi/(2*real(z2)))
7382 call cbesi(z1, pt5, 1, 1, work_complex(1), nz, ierr)
7383 src_si(1) = s1 * real(work_complex(1))
7384 call cbesi(z2, pt5, 1, 1, work_complex(1), nz, ierr)
7385 dst_si(1) = s2 * real(work_complex(1))
7386 if (p .gt. 0) then
7387 call cbesi(z1, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
7388 src_si(2:p+1) = s1 * real(work_complex(2:p+1))
7389 call cbesi(z2, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
7390 dst_si(2:p+1) = s2 * real(work_complex(2:p+1))
7391 end if
7392 ! Call corresponding work routine
7393 call fmm_l2l_bessel_rotation_adj_work(-c*kappa, dst_si, src_si, p, vscales, alpha, &
7394 & src_l, beta, dst_l, work, work_complex)
7395end subroutine fmm_l2l_bessel_rotation_adj
7396
7422subroutine fmm_l2l_bessel_rotation_adj_work(c, src_si, dst_si, p, vscales, &
7423 & alpha, src_l, beta, dst_l, work, work_complex)
7424 ! Inputs
7425 integer, intent(in) :: p
7426 real(dp), intent(in) :: c(3), src_si(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
7427 & alpha, src_l((p+1)*(p+1)), beta
7428 ! Output
7429 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7430 ! Temporary workspace
7431 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
7432 complex(dp), intent(out) :: work_complex(2*p+1)
7433 ! Local variables
7434 real(dp) :: rho, ctheta, stheta, cphi, sphi
7435 integer :: m, n
7436 ! Pointers for temporary values of harmonics
7437 real(dp), pointer :: tmp_l(:), tmp_l2(:), vcos(:), vsin(:)
7438 ! Convert Cartesian coordinates into spherical
7439 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
7440 ! If no need for rotations, just do translation along z
7441 !if (stheta .eq. zero) then
7442 ! ! Workspace here is 2*(p+1)
7443 ! call fmm_l2l_bessel_ztranslate_adj_work(abs(c(3)), dst_si, src_si, p, &
7444 ! & vscales, one, src_l, beta, dst_l, work, work_complex)
7445 ! return
7446 !end if
7447 ! Prepare pointers
7448 m = (p+1)**2
7449 n = 4*m + 5*p ! 4*p*p + 13*p + 4
7450 tmp_l(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
7451 n = n + m
7452 tmp_l2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
7453 n = n + m
7454 m = p + 1
7455 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
7456 n = n + m
7457 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
7458 ! Compute arrays of cos and sin that are needed for rotations of harmonics
7459 call trgev(cphi, sphi, p, vcos, vsin)
7460 ! Rotate around OZ axis (work array might appear in the future)
7461 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_l, zero, tmp_l)
7462 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
7463 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_l, zero, &
7464 & tmp_l2, work)
7465 ! OZ translation, workspace here is 2*(p+1)
7466 call fmm_l2l_bessel_ztranslate_adj_work(rho, dst_si, src_si, p, vscales, &
7467 & one, tmp_l2, zero, tmp_l, work, work_complex)
7468 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
7469 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_l, zero, tmp_l2, &
7470 & work)
7471 ! Backward rotation around OZ axis (work array might appear in the future)
7472 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_l2, beta, dst_l)
7474
7475subroutine fmm_l2l_bessel_grad(p, sph_si, vscales, sph_l, sph_l_grad)
7476 !! Inputs
7477 integer, intent(in) :: p
7478 real(dp), intent(in) :: sph_si(p+2), vscales((p+2)**2), sph_l((p+1)**2)
7479 !! Output
7480 real(dp), intent(out) :: sph_l_grad((p+2)**2, 3)
7481 !! Local variables
7482 real(dp), dimension(3, 3) :: zx_coord_transform, zy_coord_transform
7483 real(dp) :: tmp((p+1)**2), tmp2((p+2)**2)
7484 ! Set coordinate transformations
7485 zx_coord_transform = zero
7486 zx_coord_transform(3, 2) = one
7487 zx_coord_transform(2, 3) = one
7488 zx_coord_transform(1, 1) = one
7489 zy_coord_transform = zero
7490 zy_coord_transform(1, 2) = one
7491 zy_coord_transform(2, 1) = one
7492 zy_coord_transform(3, 3) = one
7493 ! Transform input harmonics for OX axis
7494 call fmm_sph_transform(p, zx_coord_transform, one, sph_l, zero, tmp)
7495 ! Differentiate L2L along OZ
7496 call fmm_l2l_bessel_derivative_ztranslate_work(sph_si, p, vscales, &
7497 & one, tmp, zero, tmp2)
7498 ! Transform into output harmonics for OX axis
7499 call fmm_sph_transform(p+1, zx_coord_transform, one, tmp2, zero, &
7500 & sph_l_grad(:, 1))
7501 ! Transform input harmonics for OY axis
7502 call fmm_sph_transform(p, zy_coord_transform, one, sph_l, zero, tmp)
7503 ! Differentiate M2M along OZ
7504 call fmm_l2l_bessel_derivative_ztranslate_work(sph_si, p, vscales, &
7505 & one, tmp, zero, tmp2)
7506 ! Transform into output harmonics for OY axis
7507 call fmm_sph_transform(p+1, zy_coord_transform, one, tmp2, zero, &
7508 & sph_l_grad(:, 2))
7509 ! Differentiate for OZ axis
7510 call fmm_l2l_bessel_derivative_ztranslate_work(sph_si, p, vscales, &
7511 & one, sph_l, zero, sph_l_grad(:, 3))
7512end subroutine fmm_l2l_bessel_grad
7513
7539subroutine fmm_l2l_rotation_adj(c, src_r, dst_r, p, vscales, vfact, &
7540 & alpha, src_l, beta, dst_l)
7541 ! Inputs
7542 integer, intent(in) :: p
7543 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
7544 & vfact(2*p+1), alpha, src_l((p+1)*(p+1)), beta
7545 ! Output
7546 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7547 ! Temporary workspace
7548 real(dp) :: work(6*p*p + 19*p + 8)
7549 ! Call corresponding work routine
7550 call fmm_l2l_rotation_adj_work(c, src_r, dst_r, p, vscales, vfact, alpha, &
7551 & src_l, beta, dst_l, work)
7552end subroutine fmm_l2l_rotation_adj
7553
7580subroutine fmm_l2l_rotation_adj_work(c, src_r, dst_r, p, vscales, vfact, &
7581 & alpha, src_l, beta, dst_l, work)
7582 ! Inputs
7583 integer, intent(in) :: p
7584 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
7585 & vfact(2*p+1), alpha, src_l((p+1)*(p+1)), beta
7586 ! Output
7587 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
7588 ! Temporary workspace
7589 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
7590 ! Local variables
7591 real(dp) :: rho, ctheta, stheta, cphi, sphi
7592 integer :: m, n
7593 ! Pointers for temporary values of harmonics
7594 real(dp), pointer :: tmp_l(:), tmp_l2(:), vcos(:), vsin(:)
7595 ! Covert Cartesian coordinates into spherical
7596 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
7597 ! If no need for rotations, just do translation along z
7598 if (stheta .eq. zero) then
7599 ! Workspace here is 2*(p+1)
7600 call fmm_l2l_ztranslate_adj_work(c(3), src_r, dst_r, p, vscales, &
7601 & vfact, alpha, src_l, beta, dst_l, work)
7602 return
7603 end if
7604 ! Prepare pointers
7605 m = (p+1)**2
7606 n = 4*m + 5*p ! 4*p*p + 13*p + 4
7607 tmp_l(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
7608 n = n + m
7609 tmp_l2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
7610 n = n + m
7611 m = p + 1
7612 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
7613 n = n + m
7614 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
7615 ! Compute arrays of cos and sin that are needed for rotations of harmonics
7616 call trgev(cphi, sphi, p, vcos, vsin)
7617 ! Rotate around OZ axis (work array might appear in the future)
7618 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_l, zero, tmp_l)
7619 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
7620 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_l, zero, &
7621 & tmp_l2, work)
7622 ! OZ translation, workspace here is 2*(p+1)
7623 call fmm_l2l_ztranslate_adj_work(rho, src_r, dst_r, p, vscales, vfact, &
7624 & one, tmp_l2, zero, tmp_l, work)
7625 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
7626 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_l, zero, tmp_l2, &
7627 & work)
7628 ! Backward rotation around OZ axis (work array might appear in the future)
7629 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_l2, beta, dst_l)
7630end subroutine fmm_l2l_rotation_adj_work
7631
7655subroutine fmm_m2l_ztranslate(z, src_r, dst_r, pm, pl, vscales, &
7656 & m2l_ztranslate_coef, alpha, src_m, beta, dst_l)
7657 ! Inputs
7658 integer, intent(in) :: pm, pl
7659 real(dp), intent(in) :: z, src_r, dst_r, vscales((pm+pl+1)*(pm+pl+1)), &
7660 & m2l_ztranslate_coef(pm+1, pl+1, pl+1), alpha, src_m((pm+1)*(pm+1)), &
7661 & beta
7662 ! Output
7663 real(dp), intent(inout) :: dst_l((pl+1)*(pl+1))
7664 ! Temporary workspace
7665 real(dp) :: work((pm+2)*(pm+1))
7666 ! Call corresponding work routine
7667 call fmm_m2l_ztranslate_work(z, src_r, dst_r, pm, pl, vscales, &
7668 & m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
7669end subroutine fmm_m2l_ztranslate
7670
7695subroutine fmm_m2l_ztranslate_work(z, src_r, dst_r, pm, pl, vscales, &
7696 & m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
7697 ! Inputs
7698 integer, intent(in) :: pm, pl
7699 real(dp), intent(in) :: z, src_r, dst_r, vscales((pm+pl+1)*(pm+pl+1)), &
7700 & m2l_ztranslate_coef(pm+1, pl+1, pl+1), alpha, src_m((pm+1)*(pm+1)), &
7701 & beta
7702 ! Output
7703 real(dp), intent(inout) :: dst_l((pl+1)*(pl+1))
7704 ! Temporary workspace
7705 real(dp), intent(out), target :: work((pm+2)*(pm+1))
7706 ! Local variables
7707 real(dp) :: tmp1, r1, r2, res1, res2, pow_r2
7708 integer :: j, k, n, indj, indk1, indk2
7709 ! Pointers for temporary values of powers
7710 real(dp), pointer :: src_m2(:), pow_r1(:)
7711 ! In case alpha is zero just do a proper scaling of output
7712 if (alpha .eq. zero) then
7713 if (beta .eq. zero) then
7714 dst_l = zero
7715 else
7716 dst_l = beta * dst_l
7717 end if
7718 return
7719 end if
7720 ! Now alpha is non-zero
7721 ! z cannot be zero, as input sphere (multipole) must not intersect with
7722 ! output sphere (local)
7723 if (z .eq. zero) then
7724 return
7725 end if
7726 ! Prepare pointers
7727 n = (pm+1) ** 2
7728 src_m2(1:n) => work(1:n)
7729 pow_r1(1:pm+1) => work(n+1:n+pm+1)
7730 ! Get powers of r1 and r2
7731 r1 = src_r / z
7732 r2 = dst_r / z
7733 ! This abs(r1) makes it possible to work with negative z to avoid
7734 ! unnecessary rotation to positive z
7735 tmp1 = abs(r1)
7736 do j = 0, pm
7737 indj = j*j + j + 1
7738 pow_r1(j+1) = tmp1 / vscales(indj)
7739 tmp1 = tmp1 * r1
7740 end do
7741 pow_r2 = one
7742 ! Reorder source harmonics from (degree, order) to (order, degree)
7743 ! Zero order k=0 at first
7744 do j = 0, pm
7745 indj = j*j + j + 1
7746 src_m2(j+1) = pow_r1(j+1) * src_m(indj)
7747 end do
7748 ! Non-zero orders next, a positive k followed by a negative -k
7749 indk1 = pm + 2
7750 do k = 1, pm
7751 n = pm - k + 1
7752 indk2 = indk1 + n
7753 !!GCC$ unroll 4
7754 do j = k, pm
7755 indj = j*j + j + 1
7756 src_m2(indk1+j-k) = pow_r1(j+1) * src_m(indj+k)
7757 src_m2(indk2+j-k) = pow_r1(j+1) * src_m(indj-k)
7758 end do
7759 indk1 = indk2 + n
7760 end do
7761 ! Do actual M2L
7762 ! Overwrite output if beta is zero
7763 if (beta .eq. zero) then
7764 do j = 0, pl
7765 ! Offset for dst_l
7766 indj = j*j + j + 1
7767 ! k = 0
7768 tmp1 = alpha * vscales(indj) * pow_r2
7769 pow_r2 = pow_r2 * r2
7770 res1 = zero
7771 !!GCC$ unroll 4
7772 do n = 0, pm
7773 res1 = res1 + m2l_ztranslate_coef(n+1, 1, j+1)*src_m2(n+1)
7774 end do
7775 dst_l(indj) = tmp1 * res1
7776 ! k != 0
7777 !!GCC$ unroll 4
7778 do k = 1, j
7779 ! Offsets for src_m2
7780 indk1 = pm + 2 + (2*pm-k+2)*(k-1)
7781 indk2 = indk1 + pm - k + 1
7782 res1 = zero
7783 res2 = zero
7784 !!GCC$ unroll 4
7785 do n = k, pm
7786 res1 = res1 + &
7787 & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
7788 & src_m2(indk1+n-k)
7789 res2 = res2 + &
7790 & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
7791 & src_m2(indk2+n-k)
7792 end do
7793 dst_l(indj+k) = tmp1 * res1
7794 dst_l(indj-k) = tmp1 * res2
7795 end do
7796 end do
7797 else
7798 do j = 0, pl
7799 ! Offset for dst_l
7800 indj = j*j + j + 1
7801 ! k = 0
7802 tmp1 = alpha * vscales(indj) * pow_r2
7803 pow_r2 = pow_r2 * r2
7804 res1 = zero
7805 do n = 0, pm
7806 res1 = res1 + m2l_ztranslate_coef(n+1, 1, j+1)*src_m2(n+1)
7807 end do
7808 dst_l(indj) = beta*dst_l(indj) + tmp1*res1
7809 ! k != 0
7810 do k = 1, j
7811 ! Offsets for src_m2
7812 indk1 = pm + 2 + (2*pm-k+2)*(k-1)
7813 indk2 = indk1 + pm - k + 1
7814 res1 = zero
7815 res2 = zero
7816 do n = k, pm
7817 res1 = res1 + &
7818 & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
7819 & src_m2(indk1+n-k)
7820 res2 = res2 + &
7821 & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
7822 & src_m2(indk2+n-k)
7823 end do
7824 dst_l(indj+k) = beta*dst_l(indj+k) + tmp1*res1
7825 dst_l(indj-k) = beta*dst_l(indj-k) + tmp1*res2
7826 end do
7827 end do
7828 end if
7829end subroutine fmm_m2l_ztranslate_work
7830
7854subroutine fmm_m2l_ztranslate_adj(z, src_r, dst_r, pl, pm, vscales, &
7855 & m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m)
7856 ! Inputs
7857 integer, intent(in) :: pl, pm
7858 real(dp), intent(in) :: z, src_r, dst_r, vscales((pm+pl+1)*(pm+pl+1)), &
7859 & m2l_ztranslate_adj_coef(pl+1, pl+1, pm+1), alpha, &
7860 & src_l((pl+1)*(pl+1)), beta
7861 ! Output
7862 real(dp), intent(inout) :: dst_m((pm+1)*(pm+1))
7863 ! Temporary workspace
7864 real(dp) :: work((pl+2)*(pl+1))
7865 ! Call corresponding work routine
7866 call fmm_m2l_ztranslate_adj_work(z, src_r, dst_r, pl, pm, vscales, &
7867 & m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m, work)
7868end subroutine fmm_m2l_ztranslate_adj
7869
7893subroutine fmm_m2l_ztranslate_adj_work(z, src_r, dst_r, pl, pm, vscales, &
7894 & m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m, work)
7895 ! Inputs
7896 integer, intent(in) :: pl, pm
7897 real(dp), intent(in) :: z, src_r, dst_r, vscales((pm+pl+1)*(pm+pl+1)), &
7898 & m2l_ztranslate_adj_coef(pl+1, pl+1, pm+1), alpha, &
7899 & src_l((pl+1)*(pl+1)), beta
7900 ! Output
7901 real(dp), intent(inout) :: dst_m((pm+1)*(pm+1))
7902 ! Temporary workspace
7903 real(dp), intent(out), target :: work((pl+2)*(pl+1))
7904 ! Local variables
7905 real(dp) :: tmp1, r1, r2, pow_r1, res1, res2
7906 integer :: j, k, n, indj, indn, indk1, indk2
7907 ! Pointers for temporary values of powers
7908 real(dp), pointer :: pow_r2(:), src_l2(:)
7909 ! In case alpha is zero just do a proper scaling of output
7910 if (alpha .eq. zero) then
7911 if (beta .eq. zero) then
7912 dst_m = zero
7913 else
7914 dst_m = beta * dst_m
7915 end if
7916 return
7917 end if
7918 ! Now alpha is non-zero
7919 ! z cannot be zero, as input sphere (multipole) must not intersect with
7920 ! output sphere (local)
7921 if (z .eq. zero) then
7922 return
7923 end if
7924 ! Prepare pointers
7925 n = (pl+1) ** 2
7926 src_l2(1:n) => work(1:n)
7927 pow_r2(1:pl+1) => work(n+1:n+pl+1)
7928 ! Get powers of r1 and r2
7929 r1 = -dst_r / z
7930 r2 = -src_r / z
7931 ! This abs(r1) makes it possible to work with negative z to avoid
7932 ! unnecessary rotation to positive z
7933 tmp1 = one
7934 do j = 0, pl
7935 indj = j*j + j + 1
7936 pow_r2(j+1) = tmp1 * vscales(indj)
7937 tmp1 = tmp1 * r2
7938 end do
7939 pow_r1 = abs(r1)
7940 ! Reorder source harmonics from (degree, order) to (order, degree)
7941 ! Zero order k=0 at first
7942 do j = 0, pl
7943 indj = j*j + j + 1
7944 src_l2(j+1) = pow_r2(j+1) * src_l(indj)
7945 end do
7946 ! Non-zero orders next, a positive k followed by a negative -k
7947 indk1 = pl + 2
7948 do k = 1, pl
7949 n = pl - k + 1
7950 indk2 = indk1 + n
7951 do j = k, pl
7952 indj = j*j + j + 1
7953 src_l2(indk1+j-k) = pow_r2(j+1) * src_l(indj+k)
7954 src_l2(indk2+j-k) = pow_r2(j+1) * src_l(indj-k)
7955 end do
7956 indk1 = indk2 + n
7957 end do
7958 ! Do actual adjoint M2L
7959 ! Overwrite output if beta is zero
7960 if (beta .eq. zero) then
7961 do n = 0, pm
7962 indn = n*n + n + 1
7963 ! k = 0
7964 tmp1 = alpha * pow_r1 / vscales(indn)
7965 pow_r1 = pow_r1 * r1
7966 res1 = zero
7967 do j = 0, pl
7968 indj = j*j + j + 1
7969 res1 = res1 + m2l_ztranslate_adj_coef(j+1, 1, n+1)*src_l2(j+1)
7970 end do
7971 dst_m(indn) = tmp1 * res1
7972 ! k != 0
7973 do k = 1, n
7974 ! Offsets for src_l2
7975 indk1 = pl + 2 + (2*pl-k+2)*(k-1)
7976 indk2 = indk1 + pl - k + 1
7977 res1 = zero
7978 res2 = zero
7979 do j = k, pl
7980 indj = j*j + j + 1
7981 res1 = res1 + &
7982 & m2l_ztranslate_adj_coef(j-k+1, k+1, n-k+1)* &
7983 & src_l2(indk1+j-k)
7984 res2 = res2 + &
7985 & m2l_ztranslate_adj_coef(j-k+1, k+1, n-k+1)* &
7986 & src_l2(indk2+j-k)
7987 end do
7988 dst_m(indn+k) = tmp1 * res1
7989 dst_m(indn-k) = tmp1 * res2
7990 end do
7991 end do
7992 ! Update output if beta is non-zero
7993 else
7994 do n = 0, pm
7995 indn = n*n + n + 1
7996 ! k = 0
7997 tmp1 = alpha * pow_r1 / vscales(indn)
7998 pow_r1 = pow_r1 * r1
7999 res1 = zero
8000 do j = 0, pl
8001 indj = j*j + j + 1
8002 res1 = res1 + m2l_ztranslate_adj_coef(j+1, 1, n+1)*src_l2(j+1)
8003 end do
8004 dst_m(indn) = beta*dst_m(indn) + tmp1*res1
8005 ! k != 0
8006 do k = 1, n
8007 ! Offsets for src_l2
8008 indk1 = pl + 2 + (2*pl-k+2)*(k-1)
8009 indk2 = indk1 + pl - k + 1
8010 res1 = zero
8011 res2 = zero
8012 do j = k, pl
8013 indj = j*j + j + 1
8014 res1 = res1 + &
8015 & m2l_ztranslate_adj_coef(j-k+1, k+1, n-k+1)* &
8016 & src_l2(indk1+j-k)
8017 res2 = res2 + &
8018 & m2l_ztranslate_adj_coef(j-k+1, k+1, n-k+1)* &
8019 & src_l2(indk2+j-k)
8020 end do
8021 dst_m(indn+k) = beta*dst_m(indn+k) + tmp1*res1
8022 dst_m(indn-k) = beta*dst_m(indn-k) + tmp1*res2
8023 end do
8024 end do
8025 end if
8026end subroutine fmm_m2l_ztranslate_adj_work
8027
8054subroutine fmm_m2l_rotation(c, src_r, dst_r, pm, pl, vscales, &
8055 & m2l_ztranslate_coef, alpha, src_m, beta, dst_l)
8056 ! Inputs
8057 integer, intent(in) :: pm, pl
8058 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((pm+pl+1)**2), &
8059 & m2l_ztranslate_coef(pm+1, pl+1, pl+1), alpha, src_m((pm+1)*(pm+1)), &
8060 & beta
8061 ! Output
8062 real(dp), intent(inout) :: dst_l((pl+1)*(pl+1))
8063 ! Temporary workspace
8064 real(dp) :: work(6*max(pm, pl)**2 + 19*max(pm, pl) + 8)
8065 ! Call corresponding work routine
8066 call fmm_m2l_rotation_work(c, src_r, dst_r, pm, pl, vscales, &
8067 & m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
8068end subroutine fmm_m2l_rotation
8069
8098subroutine fmm_m2l_rotation_work(c, src_r, dst_r, pm, pl, vscales, &
8099 & m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
8100 ! Inputs
8101 integer, intent(in) :: pm, pl
8102 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((pm+pl+1)**2), &
8103 & m2l_ztranslate_coef(pm+1, pl+1, pl+1), alpha, src_m((pm+1)*(pm+1)), &
8104 & beta
8105 ! Output
8106 real(dp), intent(inout) :: dst_l((pl+1)*(pl+1))
8107 ! Temporary workspace
8108 real(dp), intent(out), target :: &
8109 & work(6*max(pm, pl)**2 + 19*max(pm, pl) + 8)
8110 ! Local variables
8111 real(dp) :: rho, ctheta, stheta, cphi, sphi
8112 integer :: m, n, p
8113 ! Pointers for temporary values of harmonics
8114 real(dp), pointer :: tmp_ml(:), tmp_ml2(:), vcos(:), vsin(:)
8115 ! Covert Cartesian coordinates into spherical
8116 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
8117 ! If no need for rotations, just do translation along z
8118 if (stheta .eq. zero) then
8119 ! Workspace here is (pm+2)*(pm+1)
8120 call fmm_m2l_ztranslate_work(c(3), src_r, dst_r, pm, pl, vscales, &
8121 & m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
8122 return
8123 end if
8124 ! Prepare pointers
8125 p = max(pm, pl)
8126 m = (p+1)**2
8127 n = 4*m + 5*p ! 4*p*p + 13*p + 4
8128 tmp_ml(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
8129 n = n + m
8130 tmp_ml2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
8131 n = n + m
8132 m = p + 1
8133 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
8134 n = n + m
8135 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
8136 ! Compute arrays of cos and sin that are needed for rotations of harmonics
8137 call trgev(cphi, sphi, p, vcos, vsin)
8138 ! Rotate around OZ axis (work array might appear in the future)
8139 call fmm_sph_rotate_oz_adj_work(pm, vcos, vsin, alpha, src_m, zero, tmp_ml)
8140 ! Perform rotation in the OXZ plane, work size is 4*pm*pm+13*pm+4
8141 call fmm_sph_rotate_oxz_work(pm, ctheta, -stheta, one, tmp_ml, zero, &
8142 & tmp_ml2, work)
8143 ! OZ translation, workspace here is (pm+2)*(pm+1)
8144 call fmm_m2l_ztranslate_work(rho, src_r, dst_r, pm, pl, vscales, &
8145 & m2l_ztranslate_coef, one, tmp_ml2, zero, tmp_ml, work)
8146 ! Backward rotation in the OXZ plane, work size is 4*pl*pl+13*pl+4
8147 call fmm_sph_rotate_oxz_work(pl, ctheta, stheta, one, tmp_ml, zero, &
8148 & tmp_ml2, work)
8149 ! Backward rotation around OZ axis (work array might appear in the future)
8150 call fmm_sph_rotate_oz_work(pl, vcos, vsin, one, tmp_ml2, beta, dst_l)
8151end subroutine fmm_m2l_rotation_work
8152
8177subroutine fmm_m2l_bessel_rotation(c, src_r, dst_r, kappa, p, vscales, alpha, &
8178 & src_m, beta, dst_l)
8179 use complex_bessel
8180 ! Inputs
8181 integer, intent(in) :: p
8182 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
8183 & alpha, src_m((p+1)*(p+1)), beta, kappa
8184 ! Output
8185 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
8186 ! Temporary workspace
8187 real(dp) :: work(6*p*p + 19*p + 8), src_sk(p+1), dst_si(p+1), s1, s2
8188 complex(dp) :: work_complex(2*p+1), z1, z2
8189 integer :: NZ, ierr
8190 ! Compute Bessel functions
8191 z1 = src_r * kappa
8192 z2 = dst_r * kappa
8193 s1 = sqrt(2/(pi*real(z1)))
8194 s2 = sqrt(pi/(2*real(z2)))
8195 call cbesk(z1, pt5, 1, 1, work_complex(1), nz, ierr)
8196 src_sk(1) = s1 * real(work_complex(1))
8197 call cbesi(z2, pt5, 1, 1, work_complex(1), nz, ierr)
8198 dst_si(1) = s2 * real(work_complex(1))
8199 if (p .gt. 0) then
8200 call cbesk(z1, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
8201 src_sk(2:p+1) = s1 * real(work_complex(2:p+1))
8202 call cbesi(z2, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
8203 dst_si(2:p+1) = s2 * real(work_complex(2:p+1))
8204 end if
8205 ! Call corresponding work routine
8206 call fmm_m2l_bessel_rotation_work(c*kappa, src_sk, dst_si, p, vscales, alpha, &
8207 & src_m, beta, dst_l, work, work_complex)
8208end subroutine fmm_m2l_bessel_rotation
8209
8235subroutine fmm_m2l_bessel_rotation_work(c, src_sk, dst_si, p, vscales, alpha, &
8236 & src_m, beta, dst_l, work, work_complex)
8237 ! Inputs
8238 integer, intent(in) :: p
8239 real(dp), intent(in) :: c(3), src_sk(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
8240 & alpha, src_m((p+1)*(p+1)), beta
8241 ! Output
8242 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
8243 ! Temporary workspace
8244 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
8245 complex(dp), intent(out) :: work_complex(2*p+1)
8246 ! Local variables
8247 real(dp) :: rho, ctheta, stheta, cphi, sphi
8248 integer :: m, n
8249 ! Pointers for temporary values of harmonics
8250 real(dp), pointer :: tmp_m(:), tmp_m2(:), vcos(:), vsin(:)
8251 ! Convert Cartesian coordinates into spherical
8252 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
8253 ! If no need for rotations, just do translation along z
8254 !if (stheta .eq. zero) then
8255 ! ! Workspace here is 2*(p+1)
8256 ! call fmm_m2m_bessel_ztranslate_work(c(3), src_r, dst_r, p, vscales, &
8257 ! & alpha, src_m, beta, dst_m, work)
8258 ! return
8259 !end if
8260 ! Prepare pointers
8261 m = (p+1)**2
8262 n = 4*m + 5*p ! 4*p*p + 13*p + 4
8263 tmp_m(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
8264 n = n + m
8265 tmp_m2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
8266 n = n + m
8267 m = p + 1
8268 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
8269 n = n + m
8270 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
8271 ! Compute arrays of cos and sin that are needed for rotations of harmonics
8272 call trgev(cphi, sphi, p, vcos, vsin)
8273 ! Rotate around OZ axis (work array might appear in the future)
8274 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_m, zero, tmp_m)
8275 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
8276 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_m, zero, &
8277 & tmp_m2, work)
8278 ! OZ translation, workspace here is 2*(p+1)
8279 call fmm_m2l_bessel_ztranslate_work(rho, src_sk, dst_si, p, vscales, one, &
8280 & tmp_m2, zero, tmp_m, work, work_complex)
8281 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
8282 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_m, zero, tmp_m2, &
8283 & work)
8284 ! Backward rotation around OZ axis (work array might appear in the future)
8285 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_m2, beta, dst_l)
8286end subroutine fmm_m2l_bessel_rotation_work
8287
8312subroutine fmm_m2l_bessel_rotation_adj(c, src_r, dst_r, kappa, p, vscales, alpha, &
8313 & src_m, beta, dst_l)
8314 use complex_bessel
8315 ! Inputs
8316 integer, intent(in) :: p
8317 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((p+1)*(p+1)), &
8318 & alpha, src_m((p+1)*(p+1)), beta, kappa
8319 ! Output
8320 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
8321 ! Temporary workspace
8322 real(dp) :: work(6*p*p + 19*p + 8), src_sk(p+1), dst_si(p+1), s1, s2, ck(3)
8323 complex(dp) :: work_complex(2*p+1), z1, z2
8324 integer :: NZ, ierr
8325 ! Compute Bessel functions
8326 z1 = src_r * kappa
8327 z2 = dst_r * kappa
8328 s1 = sqrt(2/(pi*real(z1)))
8329 s2 = sqrt(pi/(2*real(z2)))
8330 call cbesk(z1, pt5, 1, 1, work_complex(1), nz, ierr)
8331 src_sk(1) = s1 * real(work_complex(1))
8332 call cbesi(z2, pt5, 1, 1, work_complex(1), nz, ierr)
8333 dst_si(1) = s2 * real(work_complex(1))
8334 if (p .gt. 0) then
8335 call cbesk(z1, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
8336 src_sk(2:p+1) = s1 * real(work_complex(2:p+1))
8337 call cbesi(z2, 1.5d0, 1, p, work_complex(2:p+1), nz, ierr)
8338 dst_si(2:p+1) = s2 * real(work_complex(2:p+1))
8339 end if
8340 ! Call corresponding work routine
8341 ck = - c*kappa
8342 call fmm_m2l_bessel_rotation_adj_work(ck, dst_si, src_sk, p, vscales, &
8343 & alpha, src_m, beta, dst_l, work, work_complex)
8344end subroutine fmm_m2l_bessel_rotation_adj
8345
8371subroutine fmm_m2l_bessel_rotation_adj_work(c, src_sk, dst_si, p, vscales, alpha, &
8372 & src_m, beta, dst_l, work, work_complex)
8373 ! Inputs
8374 integer, intent(in) :: p
8375 real(dp), intent(in) :: c(3), src_sk(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
8376 & alpha, src_m((p+1)*(p+1)), beta
8377 ! Output
8378 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
8379 ! Temporary workspace
8380 real(dp), intent(out), target :: work(6*p*p + 19*p + 8)
8381 complex(dp), intent(out) :: work_complex(2*p+1)
8382 ! Local variables
8383 real(dp) :: rho, ctheta, stheta, cphi, sphi
8384 integer :: m, n
8385 ! Pointers for temporary values of harmonics
8386 real(dp), pointer :: tmp_m(:), tmp_m2(:), vcos(:), vsin(:)
8387 ! Convert Cartesian coordinates into spherical
8388 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
8389 ! If no need for rotations, just do translation along z
8390 !if (stheta .eq. zero) then
8391 ! ! Workspace here is 2*(p+1)
8392 ! call fmm_m2m_bessel_ztranslate_work(c(3), src_r, dst_r, p, vscales, &
8393 ! & alpha, src_m, beta, dst_m, work)
8394 ! return
8395 !end if
8396 ! Prepare pointers
8397 m = (p+1)**2
8398 n = 4*m + 5*p ! 4*p*p + 13*p + 4
8399 tmp_m(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
8400 n = n + m
8401 tmp_m2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
8402 n = n + m
8403 m = p + 1
8404 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
8405 n = n + m
8406 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
8407 ! Compute arrays of cos and sin that are needed for rotations of harmonics
8408 call trgev(cphi, sphi, p, vcos, vsin)
8409 ! Rotate around OZ axis (work array might appear in the future)
8410 call fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src_m, zero, tmp_m)
8411 ! Perform rotation in the OXZ plane, work size is 4*p*p+13*p+4
8412 call fmm_sph_rotate_oxz_work(p, ctheta, -stheta, one, tmp_m, zero, &
8413 & tmp_m2, work)
8414 ! OZ translation, workspace here is 2*(p+1)
8415 call fmm_m2l_bessel_ztranslate_adj_work(rho, src_sk, dst_si, p, vscales, one, &
8416 & tmp_m2, zero, tmp_m, work, work_complex)
8417 ! Backward rotation in the OXZ plane, work size is 4*p*p+13*p+4
8418 call fmm_sph_rotate_oxz_work(p, ctheta, stheta, one, tmp_m, zero, tmp_m2, &
8419 & work)
8420 ! Backward rotation around OZ axis (work array might appear in the future)
8421 call fmm_sph_rotate_oz_work(p, vcos, vsin, one, tmp_m2, beta, dst_l)
8423
8445subroutine fmm_m2l_bessel_ztranslate(z, src_sk, dst_si, p, vscales, alpha, &
8446 & src_m, beta, dst_l)
8447 ! Inputs
8448 integer, intent(in) :: p
8449 real(dp), intent(in) :: z, src_sk(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
8450 & alpha, src_m((p+1)*(p+1)), beta
8451 ! Output
8452 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
8453 ! Temporary workspace
8454 real(dp) :: work(2*p+1)
8455 complex(dp) :: work_complex(2*p+1)
8456 ! Call corresponding work routine
8457 call fmm_m2l_bessel_ztranslate_work(z, src_sk, dst_si, p, vscales, &
8458 & alpha, src_m, beta, dst_l, work, work_complex)
8459end subroutine fmm_m2l_bessel_ztranslate
8460
8484subroutine fmm_m2l_bessel_ztranslate_work(z, src_sk, dst_si, p, vscales, &
8485 & alpha, src_m, beta, dst_l, work, work_complex)
8486 use complex_bessel
8487 ! Inputs
8488 integer, intent(in) :: p
8489 real(dp), intent(in) :: z, src_sk(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
8490 & alpha, src_m((p+1)*(p+1)), beta
8491 ! Output
8492 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
8493 ! Temporary workspace
8494 real(dp), intent(out), target :: work(2*p+1)
8495 complex(dp), intent(out) :: work_complex(2*p+1)
8496 ! Local variables
8497 real(dp) :: r1, tmp1, tmp2, tmp3, res1, res2
8498 integer :: j, k, n, indj, indn, l, NZ, ierr
8499 ! Pointers for temporary values of powers
8500 real(dp) :: fact(2*p+1)
8501 complex(dp) :: complex_argument
8502 ! In case alpha is zero just do a proper scaling of output
8503 if (alpha .eq. zero) then
8504 if (beta .eq. zero) then
8505 dst_l = zero
8506 else
8507 dst_l = beta * dst_l
8508 end if
8509 return
8510 end if
8511 ! Get factorials
8512 fact(1) = one
8513 do j = 1, 2*p
8514 fact(j+1) = dble(j) * fact(j)
8515 end do
8516 ! Now alpha is non-zero
8517 ! If harmonics have different centers
8518 if (z .ne. 0) then
8519 complex_argument = z
8520 call cbesk(complex_argument, pt5, 1, 1, work_complex(1), nz, ierr)
8521 work(1) = sqrt(2/(pi*z)) * real(work_complex(1))
8522 if (p .gt. 0) then
8523 call cbesk(complex_argument, 1.5d0, 1, 2*p, &
8524 & work_complex(2:2*p+1), nz, ierr)
8525 work(2:2*p+1) = sqrt(2/(pi*z)) * real(work_complex(2:2*p+1))
8526 end if
8527 ! Do actual M2M
8528 ! Overwrite output if beta is zero
8529 if (beta .eq. zero) then
8530 dst_l = zero
8531 do j = 0, p
8532 ! Offset for dst_m
8533 indj = j*j + j + 1
8534 ! k = 0
8535 k = 0
8536 res1 = zero
8537 do n = 0, p
8538 ! Offset for src_m
8539 indn = n*n + n + 1
8540 tmp1 = vscales(indn) * ((-one)**(n)) * &
8541 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
8542 & src_sk(n+1) / vscales(indj) * dst_si(j+1)
8543 tmp3 = zero
8544 do l = 0, min(j, n)
8545 tmp2 = ((-two)**(-l)) / &
8546 & fact(l+1)*fact(2*l+1)/ &
8547 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
8548 & (work(j+n-l+1) / (z**l))
8549 tmp3 = tmp3 + tmp2
8550 end do
8551 tmp3 = tmp3 * tmp1
8552 res1 = res1 + tmp3*src_m(indn)
8553 end do
8554 dst_l(indj) = res1
8555 ! k != 0
8556 do k = 1, j
8557 res1 = zero
8558 res2 = zero
8559 do n = k, p
8560 ! Offset for src_m
8561 indn = n*n + n + 1
8562 tmp1 = vscales(indn+k) * ((-one)**(n)) * &
8563 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
8564 & src_sk(n+1) / vscales(indj+k) * dst_si(j+1)
8565 tmp3 = zero
8566 do l = k, min(j, n)
8567 tmp2 = ((-two)**(-l)) / &
8568 & fact(l+k+1)*fact(2*l+1)/ &
8569 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
8570 & (work(j+n-l+1) / (z**l))
8571 tmp3 = tmp3 + tmp2
8572 end do
8573 tmp3 = tmp3 * tmp1
8574 res1 = res1 + tmp3*src_m(indn+k)
8575 res2 = res2 + tmp3*src_m(indn-k)
8576 end do
8577 dst_l(indj+k) = res1
8578 dst_l(indj-k) = res2
8579 end do
8580 end do
8581 ! Update output if beta is non-zero
8582 else
8583 do j = 0, p
8584 ! Offset for dst_m
8585 indj = j*j + j + 1
8586 ! k = 0
8587 k = 0
8588 res1 = zero
8589 do n = 0, p
8590 ! Offset for src_m
8591 indn = n*n + n + 1
8592 tmp1 = vscales(indn) * ((-one)**(n)) * &
8593 & dble(2*j+1) * fact(j+1) * fact(n+1) / &
8594 & src_sk(n+1) / vscales(indj) * dst_si(j+1)
8595 tmp3 = zero
8596 do l = 0, min(j, n)
8597 tmp2 = ((-two)**(-l)) / &
8598 & fact(l+1)*fact(2*l+1)/ &
8599 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
8600 & (work(j+n-l+1) / (z**l))
8601 tmp3 = tmp3 + tmp2
8602 end do
8603 tmp3 = tmp3 * tmp1
8604 res1 = res1 + tmp3*src_m(indn)
8605 end do
8606 dst_l(indj) = beta*dst_l(indj) + res1
8607 ! k != 0
8608 do k = 1, j
8609 res1 = zero
8610 res2 = zero
8611 do n = k, p
8612 ! Offset for src_m
8613 indn = n*n + n + 1
8614 tmp1 = vscales(indn+k) * ((-one)**(n)) * &
8615 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) / &
8616 & src_sk(n+1) / vscales(indj+k) * dst_si(j+1)
8617 tmp3 = zero
8618 do l = k, min(j, n)
8619 tmp2 = ((-two)**(-l)) / &
8620 & fact(l+k+1)*fact(2*l+1)/ &
8621 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
8622 & (work(j+n-l+1) / (z**l))
8623 tmp3 = tmp3 + tmp2
8624 end do
8625 tmp3 = tmp3 * tmp1
8626 res1 = res1 + tmp3*src_m(indn+k)
8627 res2 = res2 + tmp3*src_m(indn-k)
8628 end do
8629 dst_l(indj+k) = beta*dst_l(indj+k) + res1
8630 dst_l(indj-k) = beta*dst_l(indj-k) + res2
8631 end do
8632 end do
8633 end if
8634 ! If harmonics are located at the same point
8635 else
8636 r1 = zero
8637 ! Overwrite output if beta is zero
8638 if (beta .eq. zero) then
8639 !r1 = src_r / dst_r
8640 tmp1 = alpha * r1
8641 do j = 0, p
8642 indj = j*j + j + 1
8643 do k = indj-j, indj+j
8644 dst_l(k) = tmp1 * src_m(k)
8645 end do
8646 tmp1 = tmp1 * r1
8647 end do
8648 ! Update output if beta is non-zero
8649 else
8650 !r1 = src_r / dst_r
8651 tmp1 = alpha * r1
8652 do j = 0, p
8653 indj = j*j + j + 1
8654 do k = indj-j, indj+j
8655 dst_l(k) = beta*dst_l(k) + tmp1*src_m(k)
8656 end do
8657 tmp1 = tmp1 * r1
8658 end do
8659 end if
8660 end if
8661end subroutine fmm_m2l_bessel_ztranslate_work
8662
8684subroutine fmm_m2l_bessel_ztranslate_adj(z, src_sk, dst_si, p, vscales, alpha, &
8685 & src_m, beta, dst_l)
8686 ! Inputs
8687 integer, intent(in) :: p
8688 real(dp), intent(in) :: z, src_sk(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
8689 & alpha, src_m((p+1)*(p+1)), beta
8690 ! Output
8691 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
8692 ! Temporary workspace
8693 real(dp) :: work(2*p+1)
8694 complex(dp) :: work_complex(2*p+1)
8695 ! Call corresponding work routine
8696 call fmm_m2l_bessel_ztranslate_adj_work(z, src_sk, dst_si, p, vscales, &
8697 & alpha, src_m, beta, dst_l, work, work_complex)
8698end subroutine fmm_m2l_bessel_ztranslate_adj
8699
8723subroutine fmm_m2l_bessel_ztranslate_adj_work(z, src_sk, dst_si, p, vscales, &
8724 & alpha, src_m, beta, dst_l, work, work_complex)
8725 use complex_bessel
8726 ! Inputs
8727 integer, intent(in) :: p
8728 real(dp), intent(in) :: z, src_sk(p+1), dst_si(p+1), vscales((p+1)*(p+1)), &
8729 & alpha, src_m((p+1)*(p+1)), beta
8730 ! Output
8731 real(dp), intent(inout) :: dst_l((p+1)*(p+1))
8732 ! Temporary workspace
8733 real(dp), intent(out), target :: work(2*p+1)
8734 complex(dp), intent(out) :: work_complex(2*p+1)
8735 ! Local variables
8736 real(dp) :: r1, tmp1, tmp2, tmp3, res1, res2
8737 integer :: j, k, n, indj, indn, l, NZ, ierr
8738 ! Pointers for temporary values of powers
8739 real(dp) :: fact(2*p+1)
8740 complex(dp) :: complex_argument
8741 ! In case alpha is zero just do a proper scaling of output
8742 if (alpha .eq. zero) then
8743 if (beta .eq. zero) then
8744 dst_l = zero
8745 else
8746 dst_l = beta * dst_l
8747 end if
8748 return
8749 end if
8750 ! Get factorials
8751 fact(1) = one
8752 do j = 1, 2*p
8753 fact(j+1) = dble(j) * fact(j)
8754 end do
8755 ! Now alpha is non-zero
8756 ! If harmonics have different centers
8757 if (z .ne. 0) then
8758 complex_argument = z
8759 call cbesk(complex_argument, pt5, 1, 1, work_complex(1), nz, ierr)
8760 work(1) = sqrt(2/(pi*z)) * real(work_complex(1))
8761 if (p .gt. 0) then
8762 call cbesk(complex_argument, 1.5d0, 1, 2*p, &
8763 & work_complex(2:2*p+1), nz, ierr)
8764 work(2:2*p+1) = sqrt(2/(pi*z)) * real(work_complex(2:2*p+1))
8765 end if
8766 ! Do actual M2M
8767 ! Overwrite output if beta is zero
8768 if (beta .eq. zero) then
8769 dst_l = zero
8770 do n = 0, p
8771 ! Offset for dst_m
8772 indn = n*n + n + 1
8773 ! k = 0
8774 k = 0
8775 res1 = zero
8776 do j = 0, p
8777 ! Offset for src_m
8778 indj = j*j + j + 1
8779 tmp1 = vscales(indn) * ((-one)**(n)) * &
8780 & dble(2*j+1) * fact(j+1) * fact(n+1) * &
8781 & src_sk(j+1) / vscales(indj) / dst_si(n+1)
8782 tmp3 = zero
8783 do l = 0, min(j, n)
8784 tmp2 = ((-two)**(-l)) / &
8785 & fact(l+1)*fact(2*l+1)/ &
8786 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
8787 & (work(j+n-l+1) / (z**l))
8788 tmp3 = tmp3 + tmp2
8789 end do
8790 tmp3 = tmp3 * tmp1
8791 res1 = res1 + tmp3*src_m(indj)
8792 end do
8793 dst_l(indn) = res1
8794 ! k != 0
8795 do k = 1, n
8796 res1 = zero
8797 res2 = zero
8798 do j = k, p
8799 ! Offset for src_m
8800 indj = j*j + j + 1
8801 tmp1 = vscales(indn+k) * ((-one)**(n)) * &
8802 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) * &
8803 & src_sk(j+1) / vscales(indj+k) / dst_si(n+1)
8804 tmp3 = zero
8805 do l = k, min(j, n)
8806 tmp2 = ((-two)**(-l)) / &
8807 & fact(l+k+1)*fact(2*l+1)/ &
8808 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
8809 & (work(j+n-l+1) / (z**l))
8810 tmp3 = tmp3 + tmp2
8811 end do
8812 tmp3 = tmp3 * tmp1
8813 res1 = res1 + tmp3*src_m(indj+k)
8814 res2 = res2 + tmp3*src_m(indj-k)
8815 end do
8816 dst_l(indn+k) = res1
8817 dst_l(indn-k) = res2
8818 end do
8819 end do
8820 ! Update output if beta is non-zero
8821 else
8822 do n = 0, p
8823 ! Offset for dst_m
8824 indn = n*n + n + 1
8825 ! k = 0
8826 k = 0
8827 res1 = zero
8828 do j = 0, p
8829 ! Offset for src_m
8830 indj = j*j + j + 1
8831 tmp1 = vscales(indn) * ((-one)**(n)) * &
8832 & dble(2*j+1) * fact(j+1) * fact(n+1) * &
8833 & src_sk(j+1) / vscales(indj) / dst_si(n+1)
8834 tmp3 = zero
8835 do l = 0, min(j, n)
8836 tmp2 = ((-two)**(-l)) / &
8837 & fact(l+1)*fact(2*l+1)/ &
8838 & fact(l+1)/fact(l+1)/fact(j-l+1)/fact(n-l+1)* &
8839 & (work(j+n-l+1) / (z**l))
8840 tmp3 = tmp3 + tmp2
8841 end do
8842 tmp3 = tmp3 * tmp1
8843 res1 = res1 + tmp3*src_m(indj)
8844 end do
8845 dst_l(indn) = beta*dst_l(indn) + res1
8846 ! k != 0
8847 do k = 1, n
8848 res1 = zero
8849 res2 = zero
8850 do j = k, p
8851 ! Offset for src_m
8852 indj = j*j + j + 1
8853 tmp1 = vscales(indn+k) * ((-one)**(n)) * &
8854 & dble(2*j+1) * fact(j-k+1) * fact(n+k+1) * &
8855 & src_sk(j+1) / vscales(indj+k) / dst_si(n+1)
8856 tmp3 = zero
8857 do l = k, min(j, n)
8858 tmp2 = ((-two)**(-l)) / &
8859 & fact(l+k+1)*fact(2*l+1)/ &
8860 & fact(l+1)/fact(l-k+1)/fact(j-l+1)/fact(n-l+1)* &
8861 & (work(j+n-l+1) / (z**l))
8862 tmp3 = tmp3 + tmp2
8863 end do
8864 tmp3 = tmp3 * tmp1
8865 res1 = res1 + tmp3*src_m(indj+k)
8866 res2 = res2 + tmp3*src_m(indj-k)
8867 end do
8868 dst_l(indn+k) = beta*dst_l(indn+k) + res1
8869 dst_l(indn-k) = beta*dst_l(indn-k) + res2
8870 end do
8871 end do
8872 end if
8873 ! If harmonics are located at the same point
8874 else
8875 r1 = zero
8876 ! Overwrite output if beta is zero
8877 if (beta .eq. zero) then
8878 !r1 = src_r / dst_r
8879 tmp1 = alpha * r1
8880 do j = 0, p
8881 indj = j*j + j + 1
8882 do k = indj-j, indj+j
8883 dst_l(k) = tmp1 * src_m(k)
8884 end do
8885 tmp1 = tmp1 * r1
8886 end do
8887 ! Update output if beta is non-zero
8888 else
8889 !r1 = src_r / dst_r
8890 tmp1 = alpha * r1
8891 do j = 0, p
8892 indj = j*j + j + 1
8893 do k = indj-j, indj+j
8894 dst_l(k) = beta*dst_l(k) + tmp1*src_m(k)
8895 end do
8896 tmp1 = tmp1 * r1
8897 end do
8898 end if
8899 end if
8901
8928subroutine fmm_m2l_rotation_adj(c, src_r, dst_r, pl, pm, vscales, &
8929 & m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m)
8930 ! Inputs
8931 integer, intent(in) :: pl, pm
8932 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((pm+pl+1)**2), &
8933 & m2l_ztranslate_adj_coef(pl+1, pl+1, pm+1), alpha, &
8934 & src_l((pl+1)*(pl+1)), beta
8935 ! Output
8936 real(dp), intent(inout) :: dst_m((pm+1)*(pm+1))
8937 ! Temporary workspace
8938 real(dp) :: work(6*max(pm, pl)**2 + 19*max(pm, pl) + 8)
8939 ! Call corresponding work routine
8940 call fmm_m2l_rotation_adj_work(c, src_r, dst_r, pl, pm, vscales, &
8941 & m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m, work)
8942end subroutine fmm_m2l_rotation_adj
8943
8972subroutine fmm_m2l_rotation_adj_work(c, src_r, dst_r, pl, pm, vscales, &
8973 & m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m, work)
8974 ! Inputs
8975 integer, intent(in) :: pl, pm
8976 real(dp), intent(in) :: c(3), src_r, dst_r, vscales((pm+pl+1)**2), &
8977 & m2l_ztranslate_adj_coef(pl+1, pl+1, pm+1), alpha, &
8978 & src_l((pl+1)*(pl+1)), beta
8979 ! Output
8980 real(dp), intent(inout) :: dst_m((pm+1)*(pm+1))
8981 ! Temporary workspace
8982 real(dp), intent(out), target :: &
8983 & work(6*max(pm, pl)**2 + 19*max(pm, pl) + 8)
8984 ! Local variables
8985 real(dp) :: rho, ctheta, stheta, cphi, sphi
8986 integer :: m, n, p
8987 ! Pointers for temporary values of harmonics
8988 real(dp), pointer :: tmp_ml(:), tmp_ml2(:), vcos(:), vsin(:)
8989 ! Covert Cartesian coordinates into spherical
8990 call carttosph(c, rho, ctheta, stheta, cphi, sphi)
8991 ! If no need for rotations, just do translation along z
8992 if (stheta .eq. 0) then
8993 ! Workspace here is (pl+2)*(pl+1)
8994 call fmm_m2l_ztranslate_adj_work(c(3), src_r, dst_r, pl, pm, vscales, &
8995 & m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m, work)
8996 return
8997 end if
8998 ! Prepare pointers
8999 p = max(pm, pl)
9000 m = (p+1)**2
9001 n = 4*m + 5*p ! 4*p*p + 13*p + 4
9002 tmp_ml(1:m) => work(n+1:n+m) ! 5*p*p + 15*p + 5
9003 n = n + m
9004 tmp_ml2(1:m) => work(n+1:n+m) ! 6*p*p + 17*p + 6
9005 n = n + m
9006 m = p + 1
9007 vcos => work(n+1:n+m) ! 6*p*p + 18*p + 7
9008 n = n + m
9009 vsin => work(n+1:n+m) ! 6*p*p + 19*p + 8
9010 ! Compute arrays of cos and sin that are needed for rotations of harmonics
9011 call trgev(cphi, sphi, p, vcos, vsin)
9012 ! Rotate around OZ axis (work array might appear in the future)
9013 call fmm_sph_rotate_oz_adj_work(pl, vcos, vsin, alpha, src_l, zero, tmp_ml)
9014 ! Perform rotation in the OXZ plane, work size is 4*pl*pl+13*pl+4
9015 call fmm_sph_rotate_oxz_work(pl, ctheta, -stheta, one, tmp_ml, zero, &
9016 & tmp_ml2, work)
9017 ! OZ translation, workspace here is (pl+2)*(pl+1)
9018 call fmm_m2l_ztranslate_adj_work(rho, src_r, dst_r, pl, pm, vscales, &
9019 & m2l_ztranslate_adj_coef, one, tmp_ml2, zero, tmp_ml, work)
9020 ! Backward rotation in the OXZ plane, work size is 4*pm*pm+13*pm+4
9021 call fmm_sph_rotate_oxz_work(pm, ctheta, stheta, one, tmp_ml, zero, &
9022 & tmp_ml2, work)
9023 ! Backward rotation around OZ axis (work array might appear in the future)
9024 call fmm_sph_rotate_oz_work(pm, vcos, vsin, one, tmp_ml2, beta, dst_m)
9025end subroutine fmm_m2l_rotation_adj_work
9026
9027end module ddx_harmonics
9028
Compile-time constants and definitions.
Harmonics-related core routines.
subroutine ylmscale(p, vscales, v4pi2lp1, vscales_rel)
Compute scaling factors of real normalized spherical harmonics.
subroutine fmm_m2p_adj(c, src_q, dst_r, p, vscales_rel, beta, dst_m)
Adjoint M2P operation.
subroutine fmm_l2p_adj_work(c, src_q, dst_r, p, vscales_rel, beta, dst_l, work)
Adjoint L2P.
subroutine fmm_m2m_ztranslate(z, src_r, dst_r, p, vscales, vcnk, alpha, src_m, beta, dst_m)
Direct M2M translation over OZ axis.
subroutine fmm_l2l_scale_adj(src_r, dst_r, p, alpha, src_l, beta, dst_l)
Adjoint scale L2L, when spherical harmonics are centered in the same point.
subroutine fmm_m2m_bessel_rotation_adj_work(c, src_sk, dst_sk, p, vscales, alpha, src_m, beta, dst_m, work, work_complex)
Direct M2M translation by 4 rotations and 1 translation.
subroutine fmm_m2l_bessel_rotation(c, src_r, dst_r, kappa, p, vscales, alpha, src_m, beta, dst_l)
Direct M2M translation by 4 rotations and 1 translation.
subroutine fmm_m2l_rotation(c, src_r, dst_r, pm, pl, vscales, m2l_ztranslate_coef, alpha, src_m, beta, dst_l)
Direct M2L translation by 4 rotations and 1 translation.
subroutine fmm_l2l_bessel_derivative_ztranslate_work(src_si, p, vscales, alpha, src_l, beta, dst_l)
Direct M2M translation over OZ axis.
subroutine fmm_m2m_ztranslate_work(z, src_r, dst_r, p, vscales, vcnk, alpha, src_m, beta, dst_m, work)
Direct M2M translation over OZ axis.
subroutine fmm_sph_rotate_oxz_work(p, ctheta, stheta, alpha, src, beta, dst, work)
Transform spherical harmonics in the OXZ plane.
subroutine fmm_m2m_ztranslate_adj(z, src_r, dst_r, p, vscales, vcnk, alpha, src_m, beta, dst_m)
Adjoint M2M translation over OZ axis.
subroutine fmm_m2m_bessel_derivative_ztranslate_work(src_sk, p, vscales, alpha, src_m, beta, dst_m)
Direct M2M translation over OZ axis.
subroutine fmm_l2l_rotation_adj(c, src_r, dst_r, p, vscales, vfact, alpha, src_l, beta, dst_l)
Adjoint L2L translation by 4 rotations and 1 translation.
subroutine fmm_m2l_ztranslate(z, src_r, dst_r, pm, pl, vscales, m2l_ztranslate_coef, alpha, src_m, beta, dst_l)
Direct M2L translation over OZ axis.
subroutine fmm_m2p_bessel_adj(c, src_q, dst_r, kappa, p, vscales, beta, dst_m)
Adjoint M2P operation.
subroutine fmm_m2m_rotation(c, src_r, dst_r, p, vscales, vcnk, alpha, src_m, beta, dst_m)
Direct M2M translation by 4 rotations and 1 translation.
subroutine fmm_l2l_scale(src_r, dst_r, p, alpha, src_l, beta, dst_l)
Scale L2L, when spherical harmonics are centered in the same point.
subroutine fmm_sph_rotate_oxz(p, ctheta, stheta, alpha, src, beta, dst)
Transform spherical harmonics in the OXZ plane.
subroutine fmm_m2l_ztranslate_adj_work(z, src_r, dst_r, pl, pm, vscales, m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m, work)
Adjoint M2L translation over OZ axis.
subroutine fmm_m2m_bessel_ztranslate(z, src_sk, dst_sk, p, vscales, alpha, src_m, beta, dst_m)
Direct M2M translation over OZ axis.
subroutine fmm_l2p(c, src_r, p, vscales, alpha, src_l, beta, dst_v)
Accumulate potential, induced by local spherical harmonics.
subroutine polleg_work(ctheta, stheta, p, vplm, work)
Compute associated Legendre polynomials.
subroutine fmm_l2l_ztranslate_work(z, src_r, dst_r, p, vscales, vfact, alpha, src_l, beta, dst_l, work)
Direct L2L translation over OZ axis.
subroutine fmm_l2p_adj(c, src_q, dst_r, p, vscales_rel, beta, dst_l)
Adjoint of L2P.
subroutine fmm_m2l_bessel_ztranslate(z, src_sk, dst_si, p, vscales, alpha, src_m, beta, dst_l)
Direct M2M translation over OZ axis.
subroutine fmm_p2m(c, src_q, dst_r, p, vscales, beta, dst_m)
Accumulate a multipole expansion induced by a particle of a given charge.
subroutine fmm_l2l_bessel_ztranslate_adj(z, src_si, dst_si, p, vscales, alpha, src_l, beta, dst_l)
Direct M2M translation over OZ axis.
subroutine fmm_m2m_scale(src_r, dst_r, p, alpha, src_m, beta, dst_m)
Scale M2M, when spherical harmonics are centered in the same point.
subroutine fmm_sph_rotate_oz_adj(p, vcos, vsin, alpha, src, beta, dst)
Rotate spherical harmonics around OZ axis in an opposite direction.
subroutine fmm_l2l_ztranslate_adj(z, src_r, dst_r, p, vscales, vfact, alpha, src_l, beta, dst_l)
Adjoint L2L translation over OZ axis.
subroutine fmm_l2l_bessel_ztranslate_work(z, src_si, dst_si, p, vscales, alpha, src_l, beta, dst_l, work, work_complex)
Direct M2M translation over OZ axis.
subroutine fmm_sph_rotate_oz_adj_work(p, vcos, vsin, alpha, src, beta, dst)
Rotate spherical harmonics around OZ axis in an opposite direction.
subroutine trgev(cphi, sphi, p, vcos, vsin)
Compute arrays of and .
subroutine fmm_m2p_adj_work(c, src_q, dst_r, p, vscales_rel, beta, dst_m, work)
Adjoint M2P operation.
subroutine carttosph(x, rho, ctheta, stheta, cphi, sphi)
Convert input cartesian coordinate into spherical coordinate.
subroutine fmm_l2l_bessel_rotation_adj_work(c, src_si, dst_si, p, vscales, alpha, src_l, beta, dst_l, work, work_complex)
Direct L2L translation by 4 rotations and 1 translation.
subroutine fmm_m2p_bessel_adj_work(c, src_q, dst_sk, p, vscales, beta, dst_m, work_complex, work)
Adjoint M2P operation.
subroutine fmm_m2m_bessel_rotation_work(c, src_sk, dst_sk, p, vscales, alpha, src_m, beta, dst_m, work, work_complex)
Direct M2M translation by 4 rotations and 1 translation.
subroutine ylmbas(x, rho, ctheta, stheta, cphi, sphi, p, vscales, vylm, vplm, vcos, vsin)
Compute all spherical harmonics up to a given degree at a given point.
subroutine fmm_l2l_rotation_work(c, src_r, dst_r, p, vscales, vfact, alpha, src_l, beta, dst_l, work)
Direct L2L translation by 4 rotations and 1 translation.
subroutine fmm_m2l_ztranslate_work(z, src_r, dst_r, pm, pl, vscales, m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
Direct M2L translation over OZ axis.
subroutine polleg(ctheta, stheta, p, vplm)
Compute associated Legendre polynomials.
subroutine fmm_l2l_bessel_ztranslate_adj_work(z, src_si, dst_si, p, vscales, alpha, src_l, beta, dst_l, work, work_complex)
Direct M2M translation over OZ axis.
subroutine fmm_m2p_work(c, src_r, p, vscales_rel, alpha, src_m, beta, dst_v, work)
Accumulate potential, induced by multipole spherical harmonics.
subroutine fmm_l2l_rotation(c, src_r, dst_r, p, vscales, vfact, alpha, src_l, beta, dst_l)
Direct L2L translation by 4 rotations and 1 translation.
subroutine fmm_l2l_bessel_rotation(c, src_r, dst_r, kappa, p, vscales, alpha, src_l, beta, dst_l)
Direct L2L translation by 4 rotations and 1 translation.
subroutine fmm_m2l_rotation_adj(c, src_r, dst_r, pl, pm, vscales, m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m)
Adjoint M2L translation by 4 rotations and 1 translation.
subroutine fmm_sph_rotate_oz(p, vcos, vsin, alpha, src, beta, dst)
Rotate spherical harmonics around OZ axis.
subroutine fmm_m2m_rotation_work(c, src_r, dst_r, p, vscales, vcnk, alpha, src_m, beta, dst_m, work)
Direct M2M translation by 4 rotations and 1 translation.
subroutine fmm_sph_transform(p, r1, alpha, src, beta, dst)
Transform coefficients of spherical harmonics to a new cartesion system.
subroutine fmm_l2p_bessel(c, src_r, p, vscales, alpha, src_l, beta, dst_v)
Accumulate potential, induced by local spherical harmonics.
subroutine fmm_m2p_mat(c, r, p, vscales, mat)
Accumulate potentials, induced by each multipole spherical harmonic.
subroutine fmm_m2l_rotation_work(c, src_r, dst_r, pm, pl, vscales, m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
Direct M2L translation by 4 rotations and 1 translation.
subroutine fmm_l2l_ztranslate_adj_work(z, src_r, dst_r, p, vscales, vfact, alpha, src_l, beta, dst_l, work)
Adjoint L2L translation over OZ axis.
subroutine fmm_l2l_bessel_rotation_adj(c, src_r, dst_r, kappa, p, vscales, alpha, src_l, beta, dst_l)
Direct L2L translation by 4 rotations and 1 translation.
subroutine fmm_l2l_rotation_adj_work(c, src_r, dst_r, p, vscales, vfact, alpha, src_l, beta, dst_l, work)
Adjoint L2L translation by 4 rotations and 1 translation.
subroutine fmm_m2m_bessel_ztranslate_adj(z, src_sk, dst_sk, p, vscales, alpha, src_m, beta, dst_m)
Direct M2M translation over OZ axis.
subroutine fmm_l2l_bessel_ztranslate(z, src_si, dst_si, p, vscales, alpha, src_l, beta, dst_l)
Direct M2M translation over OZ axis.
subroutine fmm_m2l_bessel_ztranslate_adj_work(z, src_sk, dst_si, p, vscales, alpha, src_m, beta, dst_l, work, work_complex)
Direct M2M translation over OZ axis.
subroutine fmm_m2m_bessel_ztranslate_adj_work(z, src_sk, dst_sk, p, vscales, alpha, src_m, beta, dst_m, work, work_complex)
Direct M2M translation over OZ axis.
subroutine fmm_m2m_ztranslate_adj_work(z, src_r, dst_r, p, vscales, vcnk, alpha, src_m, beta, dst_m, work)
Adjoint M2M translation over OZ axis.
subroutine fmm_l2l_ztranslate(z, src_r, dst_r, p, vscales, vfact, alpha, src_l, beta, dst_l)
Direct L2L translation over OZ axis.
subroutine fmm_sph_transform_work(p, r1, alpha, src, beta, dst, work)
Transform spherical harmonics to a new cartesion system of coordinates.
subroutine fmm_m2l_bessel_rotation_work(c, src_sk, dst_si, p, vscales, alpha, src_m, beta, dst_l, work, work_complex)
Direct M2M translation by 4 rotations and 1 translation.
subroutine fmm_m2l_bessel_rotation_adj(c, src_r, dst_r, kappa, p, vscales, alpha, src_m, beta, dst_l)
Direct M2M translation by 4 rotations and 1 translation.
subroutine fmm_l2l_bessel_rotation_work(c, src_si, dst_si, p, vscales, alpha, src_l, beta, dst_l, work, work_complex)
Direct L2L translation by 4 rotations and 1 translation.
subroutine fmm_m2l_rotation_adj_work(c, src_r, dst_r, pl, pm, vscales, m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m, work)
Adjoint M2L translation by 4 rotations and 1 translation.
subroutine fmm_m2p(c, src_r, p, vscales_rel, alpha, src_m, beta, dst_v)
Accumulate potential, induced by multipole spherical harmonics.
subroutine fmm_m2l_bessel_rotation_adj_work(c, src_sk, dst_si, p, vscales, alpha, src_m, beta, dst_l, work, work_complex)
Direct M2M translation by 4 rotations and 1 translation.
subroutine fmm_m2m_scale_adj(src_r, dst_r, p, alpha, src_m, beta, dst_m)
Adjoint scale M2M, when spherical harmonics are centered in the same point.
subroutine fmm_m2m_bessel_ztranslate_work(z, src_sk, dst_sk, p, vscales, alpha, src_m, beta, dst_m, work, work_complex)
Direct M2M translation over OZ axis.
subroutine fmm_m2l_bessel_ztranslate_work(z, src_sk, dst_si, p, vscales, alpha, src_m, beta, dst_l, work, work_complex)
Direct M2M translation over OZ axis.
subroutine fmm_m2l_bessel_ztranslate_adj(z, src_sk, dst_si, p, vscales, alpha, src_m, beta, dst_l)
Direct M2M translation over OZ axis.
subroutine fmm_m2p_bessel(c, src_r, p, vscales, alpha, src_m, beta, dst_v)
Accumulate potential, induced by multipole spherical harmonics.
subroutine fmm_constants(dmax, pm, pl, vcnk, m2l_ztranslate_coef, m2l_ztranslate_adj_coef)
Compute FMM-related constants.
subroutine fmm_p2m_work(c, src_q, dst_r, p, vscales, beta, dst_m, work)
Accumulate a multipole expansion induced by a particle of a given charge.
subroutine fmm_sph_rotate_oz_work(p, vcos, vsin, alpha, src, beta, dst)
Rotate spherical harmonics around OZ axis.
subroutine fmm_l2p_bessel_work(c, p, vscales, SI_ri, alpha, src_l, beta, dst_v, work_complex, work)
Accumulate Bessel potential, induced by local spherical harmonics.
subroutine fmm_m2m_bessel_rotation(c, src_r, dst_r, kappa, p, vscales, alpha, src_m, beta, dst_m)
Direct M2M translation by 4 rotations and 1 translation.
subroutine fmm_m2l_ztranslate_adj(z, src_r, dst_r, pl, pm, vscales, m2l_ztranslate_adj_coef, alpha, src_l, beta, dst_m)
Adjoint M2L translation over OZ axis.
subroutine fmm_m2m_rotation_adj_work(c, src_r, dst_r, p, vscales, vcnk, alpha, src_m, beta, dst_m, work)
Adjoint M2M translation by 4 rotations and 1 translation.
subroutine fmm_m2p_bessel_work(c, p, vscales, SK_ri, alpha, src_m, beta, dst_v, work_complex, work)
Accumulate Bessel potential, induced by multipole spherical harmonics.
subroutine fmm_m2m_bessel_rotation_adj(c, src_r, dst_r, kappa, p, vscales, alpha, src_m, beta, dst_m)
Direct M2M translation by 4 rotations and 1 translation.
subroutine fmm_l2p_work(c, src_r, p, vscales_rel, alpha, src_l, beta, dst_v, work)
Accumulate potential, induced by local spherical harmonics.
subroutine fmm_m2m_rotation_adj(c, src_r, dst_r, p, vscales, vcnk, alpha, src_m, beta, dst_m)
Adjoint M2M translation by 4 rotations and 1 translation.